From 2410013d9382b8129702fa3a3bf19a370ae7afc3 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Tue, 16 May 2017 19:47:09 +0000
Subject: [PATCH 1/9] Vendor import of clang trunk r303197:
 https://llvm.org/svn/llvm-project/cfe/trunk@303197

---
 CMakeLists.txt                                |  13 +-
 cmake/caches/Apple-stage2.cmake               |   7 +-
 cmake/caches/DistributionExample.cmake        |  13 +-
 docs/CMakeLists.txt                           |   2 +-
 docs/ClangFormatStyleOptions.rst              |  52 +-
 docs/ThreadSafetyAnalysis.rst                 |   4 +-
 include/clang-c/Index.h                       |  19 +-
 include/clang/AST/CXXInheritance.h            |   3 +-
 include/clang/AST/Decl.h                      |  11 +-
 include/clang/AST/DeclCXX.h                   |  25 +-
 include/clang/AST/ExternalASTMerger.h         |   2 +
 include/clang/AST/RecursiveASTVisitor.h       |   2 +-
 include/clang/Basic/Attr.td                   |  24 +
 include/clang/Basic/Builtins.def              |   3 +
 include/clang/Basic/DiagnosticGroups.td       |   2 +
 include/clang/Basic/DiagnosticSemaKinds.td    |  28 +
 include/clang/Basic/TargetOptions.h           |   3 +-
 include/clang/Driver/Options.td               |   3 +
 include/clang/Driver/SanitizerArgs.h          |   1 +
 include/clang/Format/Format.h                 |  51 +-
 include/clang/Frontend/CodeGenOptions.def     |   2 +
 include/clang/Lex/MacroInfo.h                 |  21 -
 include/clang/Lex/Preprocessor.h              |  15 -
 include/clang/Parse/Parser.h                  |   7 +-
 include/clang/Sema/Sema.h                     |  41 +-
 include/clang/Tooling/RefactoringCallbacks.h  |  50 ++
 lib/AST/ASTImporter.cpp                       |  14 +
 lib/AST/ASTStructuralEquivalence.cpp          |   5 +
 lib/AST/CXXInheritance.cpp                    | 114 ++-
 lib/AST/DeclCXX.cpp                           |   5 +-
 lib/AST/ExternalASTMerger.cpp                 |   6 +
 lib/AST/ODRHash.cpp                           |   2 +-
 lib/AST/Stmt.cpp                              |   2 +-
 lib/AST/Type.cpp                              |   7 +-
 lib/Basic/Targets.cpp                         |   5 +
 lib/CodeGen/BackendUtil.cpp                   |   5 +-
 lib/CodeGen/CGBlocks.cpp                      |  13 +-
 lib/CodeGen/CGBuiltin.cpp                     |  34 +-
 lib/CodeGen/CGCUDANV.cpp                      |   2 +-
 lib/CodeGen/CGCleanup.cpp                     |   3 +-
 lib/CodeGen/CGDebugInfo.cpp                   |  23 +-
 lib/CodeGen/CGDebugInfo.h                     |   5 +-
 lib/CodeGen/CGDecl.cpp                        |  13 +-
 lib/CodeGen/CGException.cpp                   |  13 +-
 lib/CodeGen/CGExpr.cpp                        |   4 +-
 lib/CodeGen/CGExprAgg.cpp                     |  20 +-
 lib/CodeGen/CGExprConstant.cpp                |  10 +-
 lib/CodeGen/CGExprScalar.cpp                  |   8 +-
 lib/CodeGen/CGObjCGNU.cpp                     | 134 ++--
 lib/CodeGen/CGObjCMac.cpp                     | 170 ++---
 lib/CodeGen/CGOpenMPRuntime.cpp               |   2 +-
 lib/CodeGen/CodeGenAction.cpp                 |  10 +-
 lib/CodeGen/CodeGenModule.cpp                 |   2 +-
 lib/CodeGen/CodeGenTypes.cpp                  |   2 +-
 lib/CodeGen/ItaniumCXXABI.cpp                 |   2 +-
 lib/CodeGen/TargetInfo.cpp                    |  24 +-
 lib/Driver/SanitizerArgs.cpp                  |  33 +-
 lib/Driver/ToolChains/Arch/Mips.cpp           |  16 +-
 lib/Driver/ToolChains/Myriad.cpp              |   8 +-
 lib/Driver/ToolChains/WebAssembly.cpp         |   5 +-
 lib/Format/ContinuationIndenter.cpp           |  16 +
 lib/Format/Format.cpp                         |  26 +-
 lib/Format/TokenAnnotator.cpp                 |  34 +-
 lib/Format/UnwrappedLineParser.cpp            |  34 +-
 lib/Format/WhitespaceManager.cpp              |  11 +-
 lib/Frontend/CompilerInvocation.cpp           |   2 +
 lib/Headers/avxintrin.h                       | 184 +++--
 lib/Headers/emmintrin.h                       | 281 ++++---
 lib/Headers/intrin.h                          |   6 -
 lib/Headers/mmintrin.h                        | 101 ++-
 lib/Headers/opencl-c.h                        |  14 +-
 lib/Headers/pmmintrin.h                       |   8 +-
 lib/Headers/prfchwintrin.h                    |   6 +-
 lib/Headers/smmintrin.h                       |  10 +-
 lib/Headers/tmmintrin.h                       |  88 ++-
 lib/Headers/x86intrin.h                       |   2 -
 lib/Headers/xmmintrin.h                       |  28 +-
 lib/Index/IndexBody.cpp                       |  47 ++
 lib/Index/IndexDecl.cpp                       |  30 +
 lib/Index/IndexTypeSourceInfo.cpp             |  30 +-
 lib/Index/IndexingContext.cpp                 |  15 +
 lib/Lex/MacroInfo.cpp                         |   2 -
 lib/Lex/ModuleMap.cpp                         |  20 +-
 lib/Lex/PPDirectives.cpp                      |  27 +-
 lib/Lex/Preprocessor.cpp                      |   7 +-
 lib/Parse/ParseDecl.cpp                       |  11 +-
 lib/Parse/ParseDeclCXX.cpp                    |   8 +-
 lib/Parse/ParseExpr.cpp                       |  45 +-
 lib/Parse/ParseExprCXX.cpp                    |   7 +-
 lib/Parse/ParseTemplate.cpp                   |  16 +-
 lib/Sema/CMakeLists.txt                       |   1 +
 lib/Sema/Sema.cpp                             |  32 +-
 lib/Sema/SemaCast.cpp                         |   3 +-
 lib/Sema/SemaCodeComplete.cpp                 |  92 ++-
 lib/Sema/SemaDecl.cpp                         | 259 +++++--
 lib/Sema/SemaDeclAttr.cpp                     |  13 +-
 lib/Sema/SemaExpr.cpp                         | 242 +++++-
 lib/Sema/SemaExprCXX.cpp                      |  29 +-
 lib/Sema/SemaExprObjC.cpp                     |   3 +-
 lib/Sema/SemaInit.cpp                         |  40 +-
 lib/Sema/SemaLookup.cpp                       |  52 +-
 lib/Sema/SemaOverload.cpp                     |   6 +-
 lib/Sema/SemaStmt.cpp                         |  54 +-
 lib/Sema/SemaTemplate.cpp                     | 196 +++--
 lib/Sema/SemaTemplateInstantiateDecl.cpp      |  14 +
 lib/Sema/SemaType.cpp                         |   5 +-
 lib/Serialization/ASTReader.cpp               |   3 +-
 lib/Serialization/ASTWriter.cpp               |   1 -
 .../Checkers/BasicObjCFoundationChecks.cpp    |  46 +-
 .../Checkers/BuiltinFunctionChecker.cpp       |  16 +
 .../Checkers/NoReturnFunctionChecker.cpp      |   4 +-
 .../Checkers/RetainCountChecker.cpp           |  47 +-
 lib/StaticAnalyzer/Checkers/SelectorExtras.h  |  40 +-
 .../Checkers/StdLibraryFunctionsChecker.cpp   |   5 +-
 .../Core/ExprEngineCallAndReturn.cpp          |   3 +-
 lib/Tooling/RefactoringCallbacks.cpp          | 156 +++-
 test/Analysis/builtin-assume.c                |   8 +
 test/CXX/drs/dr20xx.cpp                       |  30 +
 test/CXX/drs/dr4xx.cpp                        |   7 +-
 test/CodeCompletion/member-access.cpp         |  80 ++
 test/CodeGen/asan-globals-gc.cpp              |  15 +-
 test/CodeGen/asan-no-globals-no-comdat.cpp    |  11 +
 test/CodeGen/mips-aggregate-arg.c             |  38 +
 test/CodeGen/sanitize-recover.c               |  10 +-
 test/CodeGen/sparcv8-inline-asm.c             |  11 +
 test/CodeGen/thinlto_backend.ll               |   6 +-
 test/CodeGen/x86_64-mno-sse.c                 |  15 +
 test/CodeGen/xray-customevent.cpp             |  28 +
 test/CodeGenCXX/array-default-argument.cpp    |  36 +
 .../CodeGenCXX/linetable-virtual-variadic.cpp |   6 +-
 test/CodeGenCXX/vla.cpp                       |  59 +-
 test/CodeGenObjC/arc-blocks.m                 |  10 +
 test/CodeGenObjC/arc-foreach.m                |   6 +-
 ...amdgpu-debug-info-pointer-address-space.cl |  20 +-
 .../amdgpu-debug-info-variable-expression.cl  |  30 +-
 .../constant-addr-space-globals.cl            |   8 +-
 test/Driver/fsanitize.c                       |   9 +-
 test/Driver/myriad-toolchain.c                |   4 +-
 test/Driver/wasm-toolchain.c                  |   6 +-
 test/FixIt/fixit-availability.c               |   2 +-
 test/FixIt/fixit-availability.mm              |  42 +-
 test/Import/conflicting-struct/Inputs/S1.cpp  |   6 +
 test/Import/conflicting-struct/Inputs/S2.cpp  |   7 +
 test/Import/conflicting-struct/test.cpp       |   7 +
 test/Index/Core/index-dependent-source.cpp    | 143 ++++
 test/Index/Core/index-instantiated-source.cpp |  39 +
 test/Index/Core/index-source.cpp              |  52 ++
 test/Index/complete-available.m               |  20 +
 test/Index/get-cursor.m                       |  11 +
 test/Misc/warning-flags.c                     |   4 +-
 test/Modules/DebugInfoNamespace.cpp           |  19 +
 test/Modules/DebugInfoSubmoduleImport.c       |   5 +
 test/Modules/Inputs/DebugInfoNamespace/A.h    |   3 +
 test/Modules/Inputs/DebugInfoNamespace/B.h    |   3 +
 .../DebugInfoNamespace/module.modulemap       |   8 +
 .../Frameworks/Sub.framework/Headers/B.h      |   1 +
 .../Frameworks/Sub.framework/Headers/Sub.h    |   2 +
 .../Sub.framework/PrivateHeaders/BPriv.h      |   1 +
 .../Sub.framework/PrivateHeaders/SubPriv.h    |   1 +
 .../Inputs/MainA.framework/Headers/A.h        |   1 +
 .../Inputs/MainA.framework/Headers/Main.h     |   2 +
 .../MainA.framework/Modules/module.modulemap  |  12 +
 .../Modules/module.private.modulemap          |  12 +
 .../MainA.framework/PrivateHeaders/APriv.h    |   1 +
 .../MainA.framework/PrivateHeaders/MainPriv.h |   1 +
 test/Modules/Inputs/SameHeader/A.h            |   3 +
 test/Modules/Inputs/SameHeader/B.h            |   4 +
 test/Modules/Inputs/SameHeader/C.h            |  12 +
 .../Inputs/SameHeader/module.modulemap        |  11 +
 test/Modules/find-privateheaders.m            |  13 +-
 test/Modules/odr_hash.cpp                     |  33 +
 test/Modules/preprocess-module.cpp            |   5 +
 test/Modules/redefinition-same-header.m       |  20 +
 test/OpenMP/report_default_DSA.cpp            |  18 +
 test/PCH/cxx-templates.cpp                    |   8 +
 test/PCH/cxx-templates.h                      |   3 +
 test/Parser/objc-available.m                  |   6 +
 test/Preprocessor/predefined-arch-macros.c    |   8 +
 test/Preprocessor/x86_target_features.c       |   8 +
 test/Sema/overloadable.c                      |  15 +
 test/Sema/redefinition-same-header.c          |  14 +
 test/Sema/typo-correction.c                   |   7 +
 test/Sema/vector-cast.c                       |   5 +-
 test/Sema/vector-gcc-compat.c                 | 330 +++++++++
 test/Sema/vector-gcc-compat.cpp               | 328 +++++++++
 test/Sema/vector-ops.c                        | 140 ++--
 test/Sema/zvector.c                           |  24 +-
 test/SemaCXX/constructor-initializer.cpp      |  19 +
 test/SemaCXX/cxx1y-generic-lambdas.cpp        |   7 +
 .../cxx1y-variable-templates_top_level.cpp    |   7 +-
 test/SemaCXX/enable_if.cpp                    |  14 +
 test/SemaCXX/for-range-examples.cpp           |  34 +
 test/SemaCXX/invalid-member-expr.cpp          |   4 +-
 test/SemaCXX/modules-ts.cppm                  |   3 +-
 test/SemaCXX/type-traits.cpp                  |   2 +-
 test/SemaCXX/typo-correction.cpp              |  11 +-
 test/SemaCXX/vector-no-lax.cpp                |   2 +-
 test/SemaCXX/warn-unused-filescoped.cpp       |  18 +-
 test/SemaObjC/method-bad-param.m              |   6 +
 test/SemaObjC/unguarded-availability.m        |  21 +-
 test/SemaObjCXX/interface-return-type.mm      |   7 +
 test/SemaObjCXX/is-base-of.mm                 |  25 +
 test/SemaOpenCL/array-init.cl                 |  20 +
 test/SemaOpenCL/storageclass.cl               |  11 +-
 test/SemaTemplate/deduction-crash.cpp         |   2 +-
 test/SemaTemplate/default-arguments.cpp       |  16 +
 test/SemaTemplate/explicit-instantiation.cpp  |   2 +-
 .../explicit-specialization-member.cpp        |  11 +-
 .../ms-lookup-template-base-classes.cpp       |   3 +-
 test/SemaTemplate/typo-template-name.cpp      |  43 ++
 tools/c-index-test/c-index-test.c             |  13 +
 tools/clang-import-test/clang-import-test.cpp |  46 +-
 tools/libclang/CIndex.cpp                     |  29 +
 tools/libclang/libclang.exports               |   1 +
 unittests/Format/FormatTest.cpp               | 104 ++-
 unittests/Format/FormatTestJS.cpp             |  69 +-
 unittests/Format/FormatTestSelective.cpp      |   4 +-
 unittests/Tooling/RecursiveASTVisitorTest.cpp |   8 +
 .../Tooling/RefactoringCallbacksTest.cpp      |  91 ++-
 www/cxx_dr_status.html                        | 690 +++++++++---------
 220 files changed, 5201 insertions(+), 1618 deletions(-)
 create mode 100644 test/Analysis/builtin-assume.c
 create mode 100644 test/CXX/drs/dr20xx.cpp
 create mode 100644 test/CodeGen/asan-no-globals-no-comdat.cpp
 create mode 100644 test/CodeGen/mips-aggregate-arg.c
 create mode 100644 test/CodeGen/sparcv8-inline-asm.c
 create mode 100644 test/CodeGen/x86_64-mno-sse.c
 create mode 100644 test/CodeGen/xray-customevent.cpp
 create mode 100644 test/CodeGenCXX/array-default-argument.cpp
 create mode 100644 test/Import/conflicting-struct/Inputs/S1.cpp
 create mode 100644 test/Import/conflicting-struct/Inputs/S2.cpp
 create mode 100644 test/Import/conflicting-struct/test.cpp
 create mode 100644 test/Index/Core/index-dependent-source.cpp
 create mode 100644 test/Index/Core/index-instantiated-source.cpp
 create mode 100644 test/Index/complete-available.m
 create mode 100644 test/Modules/DebugInfoNamespace.cpp
 create mode 100644 test/Modules/Inputs/DebugInfoNamespace/A.h
 create mode 100644 test/Modules/Inputs/DebugInfoNamespace/B.h
 create mode 100644 test/Modules/Inputs/DebugInfoNamespace/module.modulemap
 create mode 100644 test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/B.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/Sub.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/BPriv.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/SubPriv.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Headers/A.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Headers/Main.h
 create mode 100644 test/Modules/Inputs/MainA.framework/Modules/module.modulemap
 create mode 100644 test/Modules/Inputs/MainA.framework/Modules/module.private.modulemap
 create mode 100644 test/Modules/Inputs/MainA.framework/PrivateHeaders/APriv.h
 create mode 100644 test/Modules/Inputs/MainA.framework/PrivateHeaders/MainPriv.h
 create mode 100644 test/Modules/Inputs/SameHeader/A.h
 create mode 100644 test/Modules/Inputs/SameHeader/B.h
 create mode 100644 test/Modules/Inputs/SameHeader/C.h
 create mode 100644 test/Modules/Inputs/SameHeader/module.modulemap
 create mode 100644 test/Modules/redefinition-same-header.m
 create mode 100644 test/OpenMP/report_default_DSA.cpp
 create mode 100644 test/Sema/redefinition-same-header.c
 create mode 100644 test/Sema/vector-gcc-compat.c
 create mode 100644 test/Sema/vector-gcc-compat.cpp
 create mode 100644 test/SemaObjCXX/interface-return-type.mm
 create mode 100644 test/SemaObjCXX/is-base-of.mm
 create mode 100644 test/SemaOpenCL/array-init.cl
 create mode 100644 test/SemaTemplate/typo-template-name.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ca696b1ce79f..9e43a103b2bc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -579,10 +579,17 @@ if (CLANG_ENABLE_BOOTSTRAP)
     add_dependencies(clang-bootstrap-deps compiler-rt)
   endif()
 
+  set(C_COMPILER "clang")
+  set(CXX_COMPILER "clang++")
+  if(WIN32)
+    set(C_COMPILER "clang-cl.exe")
+    set(CXX_COMPILER "clang-cl.exe")
+  endif()
+
   set(COMPILER_OPTIONS
-    -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++
-    -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
-    -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+    -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${CXX_COMPILER}
+    -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER}
+    -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER})
 
   if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
     add_dependencies(clang-bootstrap-deps llvm-profdata)
diff --git a/cmake/caches/Apple-stage2.cmake b/cmake/caches/Apple-stage2.cmake
index 11c595c1530f..f07973dc0ab2 100644
--- a/cmake/caches/Apple-stage2.cmake
+++ b/cmake/caches/Apple-stage2.cmake
@@ -13,6 +13,7 @@ set(CLANG_LINKS_TO_CREATE clang++ cc c++ CACHE STRING "")
 set(CMAKE_MACOSX_RPATH ON CACHE BOOL "")
 set(LLVM_ENABLE_ZLIB ON CACHE BOOL "")
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
+set(LLVM_ENABLE_MODULES ON CACHE BOOL "")
 set(LLVM_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "")
 set(CLANG_PLUGIN_SUPPORT OFF CACHE BOOL "")
 set(BUG_REPORT_URL "http://developer.apple.com/bugreporter/" CACHE STRING "")
@@ -28,8 +29,10 @@ set(LLVM_BUILD_TESTS ON CACHE BOOL "")
 set(LLVM_ENABLE_LTO ON CACHE BOOL "")
 set(CMAKE_C_FLAGS "-fno-stack-protector -fno-common -Wno-profile-instr-unprofiled" CACHE STRING "")
 set(CMAKE_CXX_FLAGS "-fno-stack-protector -fno-common -Wno-profile-instr-unprofiled" CACHE STRING "")
-set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "")
-set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "")
+if(LLVM_ENABLE_LTO AND NOT LLVM_ENABLE_LTO STREQUAL "THIN")
+  set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "")
+  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -gline-tables-only -DNDEBUG" CACHE STRING "")
+endif()
 set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
 
 set(LIBCXX_INSTALL_LIBRARY OFF CACHE BOOL "")
diff --git a/cmake/caches/DistributionExample.cmake b/cmake/caches/DistributionExample.cmake
index 862f547c1656..551f4ee07eac 100644
--- a/cmake/caches/DistributionExample.cmake
+++ b/cmake/caches/DistributionExample.cmake
@@ -29,6 +29,13 @@ set(CLANG_BOOTSTRAP_TARGETS
 
 # Setup the bootstrap build.
 set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
-set(CLANG_BOOTSTRAP_CMAKE_ARGS
-  -C ${CMAKE_CURRENT_LIST_DIR}/DistributionExample-stage2.cmake
-  CACHE STRING "")
+
+if(STAGE2_CACHE_FILE)
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    -C ${STAGE2_CACHE_FILE}
+    CACHE STRING "")
+else()
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    -C ${CMAKE_CURRENT_LIST_DIR}/DistributionExample-stage2.cmake
+    CACHE STRING "")
+endif()
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 13b79fdfa534..d2956c18f80c 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -91,8 +91,8 @@ endif()
 endif()
 
 if (LLVM_ENABLE_SPHINX)
+  include(AddSphinxTarget)
   if (SPHINX_FOUND)
-    include(AddSphinxTarget)
     if (${SPHINX_OUTPUT_HTML})
       add_sphinx_target(html clang)
       add_custom_command(TARGET docs-clang-html POST_BUILD
diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst
index ed628e370087..f54acd9b81dd 100644
--- a/docs/ClangFormatStyleOptions.rst
+++ b/docs/ClangFormatStyleOptions.rst
@@ -209,23 +209,45 @@ the configuration (without a prefix: ``Auto``).
     float       b = 23;
     std::string ccc = 23;
 
-**AlignEscapedNewlinesLeft** (``bool``)
-  If ``true``, aligns escaped newlines as far left as possible.
-  Otherwise puts them into the right-most column.
+**AlignEscapedNewlines** (``EscapedNewlineAlignmentStyle``)
+  Options for aligning backslashes in escaped newlines.
 
-  .. code-block:: c++
+  Possible values:
+
+  * ``ENAS_DontAlign`` (in configuration: ``DontAlign``)
+    Don't align escaped newlines.
+
+    .. code-block:: c++
+
+      #define A \
+        int aaaa; \
+        int b; \
+        int dddddddddd;
+
+  * ``ENAS_Left`` (in configuration: ``Left``)
+    Align escaped newlines as far left as possible.
+
+    .. code-block:: c++
+
+      true:
+      #define A   \
+        int aaaa; \
+        int b;    \
+        int dddddddddd;
+
+      false:
+
+  * ``ENAS_Right`` (in configuration: ``Right``)
+    Align escaped newlines in the right-most column.
+
+    .. code-block:: c++
+
+      #define A                                                                      \
+        int aaaa;                                                                    \
+        int b;                                                                       \
+        int dddddddddd;
 
-    true:
-    #define A   \
-      int aaaa; \
-      int b;    \
-      int dddddddddd;
 
-    false:
-    #define A                                                                      \
-      int aaaa;                                                                    \
-      int b;                                                                       \
-      int dddddddddd;
 
 **AlignOperands** (``bool``)
   If ``true``, horizontally align operands of binary and ternary
@@ -1525,7 +1547,7 @@ the configuration (without a prefix: ``Auto``).
     Use C++03-compatible syntax.
 
   * ``LS_Cpp11`` (in configuration: ``Cpp11``)
-    Use features of C++11, C++14 and C++1z (e.g. ``A<A<int>>`` instead of 
+    Use features of C++11, C++14 and C++1z (e.g. ``A<A<int>>`` instead of
     ``A<A<int> >``).
 
   * ``LS_Auto`` (in configuration: ``Auto``)
diff --git a/docs/ThreadSafetyAnalysis.rst b/docs/ThreadSafetyAnalysis.rst
index 47cf4ef3994e..ea8e98a1884b 100644
--- a/docs/ThreadSafetyAnalysis.rst
+++ b/docs/ThreadSafetyAnalysis.rst
@@ -884,11 +884,11 @@ implementation.
 
   // Deprecated.
   #define PT_GUARDED_VAR \
-    THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded)
+    THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_var)
 
   // Deprecated.
   #define GUARDED_VAR \
-    THREAD_ANNOTATION_ATTRIBUTE__(guarded)
+    THREAD_ANNOTATION_ATTRIBUTE__(guarded_var)
 
   // Replaced by REQUIRES
   #define EXCLUSIVE_LOCKS_REQUIRED(...) \
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index c50ac1b6d248..462d9269f7ad 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -32,7 +32,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 38
+#define CINDEX_VERSION_MINOR 39
 
 #define CINDEX_VERSION_ENCODE(major, minor) ( \
       ((major) * 10000)                       \
@@ -4080,6 +4080,23 @@ CINDEX_LINKAGE unsigned clang_Cursor_isObjCOptional(CXCursor C);
  */
 CINDEX_LINKAGE unsigned clang_Cursor_isVariadic(CXCursor C);
 
+/**
+ * \brief Returns non-zero if the given cursor points to a symbol marked with
+ * external_source_symbol attribute.
+ *
+ * \param language If non-NULL, and the attribute is present, will be set to
+ * the 'language' string from the attribute.
+ *
+ * \param definedIn If non-NULL, and the attribute is present, will be set to
+ * the 'definedIn' string from the attribute.
+ *
+ * \param isGenerated If non-NULL, and the attribute is present, will be set to
+ * non-zero if the 'generated_declaration' is set in the attribute.
+ */
+CINDEX_LINKAGE unsigned clang_Cursor_isExternalSymbol(CXCursor C,
+                                       CXString *language, CXString *definedIn,
+                                       unsigned *isGenerated);
+
 /**
  * \brief Given a cursor that represents a declaration, return the associated
  * comment's source range.  The range may include multiple consecutive comments
diff --git a/include/clang/AST/CXXInheritance.h b/include/clang/AST/CXXInheritance.h
index 3cf058f26bc6..a7961ebe8ce6 100644
--- a/include/clang/AST/CXXInheritance.h
+++ b/include/clang/AST/CXXInheritance.h
@@ -161,7 +161,8 @@ class CXXBasePaths {
   void ComputeDeclsFound();
 
   bool lookupInBases(ASTContext &Context, const CXXRecordDecl *Record,
-                     CXXRecordDecl::BaseMatchesCallback BaseMatches);
+                     CXXRecordDecl::BaseMatchesCallback BaseMatches,
+                     bool LookupInDependent = false);
 
 public:
   typedef std::list<CXXBasePath>::iterator paths_iterator;
diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h
index 573ea55de1fd..facef8e55f7a 100644
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -966,9 +966,16 @@ class VarDecl : public DeclaratorDecl, public Redeclarable<VarDecl> {
   /// hasLocalStorage - Returns true if a variable with function scope
   ///  is a non-static local variable.
   bool hasLocalStorage() const {
-    if (getStorageClass() == SC_None)
+    if (getStorageClass() == SC_None) {
+      // OpenCL v1.2 s6.5.3: The __constant or constant address space name is
+      // used to describe variables allocated in global memory and which are
+      // accessed inside a kernel(s) as read-only variables. As such, variables
+      // in constant address space cannot have local storage.
+      if (getType().getAddressSpace() == LangAS::opencl_constant)
+        return false;
       // Second check is for C++11 [dcl.stc]p4.
       return !isFileVarDecl() && getTSCSpec() == TSCS_unspecified;
+    }
 
     // Global Named Register (GNU extension)
     if (getStorageClass() == SC_Register && !isLocalVarDeclOrParm())
@@ -2478,7 +2485,7 @@ class FieldDecl : public DeclaratorDecl, public Mergeable<FieldDecl> {
   void setCapturedVLAType(const VariableArrayType *VLAType);
 
   /// getParent - Returns the parent of this field declaration, which
-  /// is the struct in which this method is defined.
+  /// is the struct in which this field is defined.
   const RecordDecl *getParent() const {
     return cast<RecordDecl>(getDeclContext());
   }
diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h
index 13921a132cfb..6965e8143ff6 100644
--- a/include/clang/AST/DeclCXX.h
+++ b/include/clang/AST/DeclCXX.h
@@ -1563,10 +1563,13 @@ class CXXRecordDecl : public RecordDecl {
   /// \param Paths used to record the paths from this class to its base class
   /// subobjects that match the search criteria.
   ///
+  /// \param LookupInDependent can be set to true to extend the search to
+  /// dependent base classes.
+  ///
   /// \returns true if there exists any path from this class to a base class
   /// subobject that matches the search criteria.
-  bool lookupInBases(BaseMatchesCallback BaseMatches,
-                     CXXBasePaths &Paths) const;
+  bool lookupInBases(BaseMatchesCallback BaseMatches, CXXBasePaths &Paths,
+                     bool LookupInDependent = false) const;
 
   /// \brief Base-class lookup callback that determines whether the given
   /// base class specifier refers to a specific class declaration.
@@ -1607,6 +1610,16 @@ class CXXRecordDecl : public RecordDecl {
   static bool FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
                                  CXXBasePath &Path, DeclarationName Name);
 
+  /// \brief Base-class lookup callback that determines whether there exists
+  /// a member with the given name.
+  ///
+  /// This callback can be used with \c lookupInBases() to find members
+  /// of the given name within a C++ class hierarchy, including dependent
+  /// classes.
+  static bool
+  FindOrdinaryMemberInDependentClasses(const CXXBaseSpecifier *Specifier,
+                                       CXXBasePath &Path, DeclarationName Name);
+
   /// \brief Base-class lookup callback that determines whether there exists
   /// an OpenMP declare reduction member with the given name.
   ///
@@ -1633,6 +1646,14 @@ class CXXRecordDecl : public RecordDecl {
   /// \brief Get the indirect primary bases for this class.
   void getIndirectPrimaryBases(CXXIndirectPrimaryBaseSet& Bases) const;
 
+  /// Performs an imprecise lookup of a dependent name in this class.
+  ///
+  /// This function does not follow strict semantic rules and should be used
+  /// only when lookup rules can be relaxed, e.g. indexing.
+  std::vector<const NamedDecl *>
+  lookupDependentName(const DeclarationName &Name,
+                      llvm::function_ref<bool(const NamedDecl *ND)> Filter);
+
   /// Renders and displays an inheritance diagram
   /// for this C++ class and all of its base classes (transitively) using
   /// GraphViz.
diff --git a/include/clang/AST/ExternalASTMerger.h b/include/clang/AST/ExternalASTMerger.h
index 51d0c30ad23b..55459df1fe6b 100644
--- a/include/clang/AST/ExternalASTMerger.h
+++ b/include/clang/AST/ExternalASTMerger.h
@@ -44,6 +44,8 @@ class ExternalASTMerger : public ExternalASTSource {
   FindExternalLexicalDecls(const DeclContext *DC,
                            llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
                            SmallVectorImpl<Decl *> &Result) override;
+
+   void CompleteType(TagDecl *Tag) override;
 };
 
 } // end namespace clang
diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h
index 1b5850a05b37..cd2a39449825 100644
--- a/include/clang/AST/RecursiveASTVisitor.h
+++ b/include/clang/AST/RecursiveASTVisitor.h
@@ -2326,7 +2326,7 @@ DEF_TRAVERSE_STMT(LambdaExpr, {
   }
 
   TypeLoc TL = S->getCallOperator()->getTypeSourceInfo()->getTypeLoc();
-  FunctionProtoTypeLoc Proto = TL.castAs<FunctionProtoTypeLoc>();
+  FunctionProtoTypeLoc Proto = TL.getAsAdjusted<FunctionProtoTypeLoc>();
 
   if (S->hasExplicitParameters() && S->hasExplicitResultType()) {
     // Visit the whole type.
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index 3eeeb1bdc971..4eb958e3f4d5 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -652,6 +652,30 @@ def Availability : InheritableAttr {
              .Case("tvos_app_extension", "tvOS (App Extension)")
              .Case("watchos_app_extension", "watchOS (App Extension)")
              .Default(llvm::StringRef());
+}
+static llvm::StringRef getPlatformNameSourceSpelling(llvm::StringRef Platform) {
+    return llvm::StringSwitch<llvm::StringRef>(Platform)
+             .Case("ios", "iOS")
+             .Case("macos", "macOS")
+             .Case("tvos", "tvOS")
+             .Case("watchos", "watchOS")
+             .Case("ios_app_extension", "iOSApplicationExtension")
+             .Case("macos_app_extension", "macOSApplicationExtension")
+             .Case("tvos_app_extension", "tvOSApplicationExtension")
+             .Case("watchos_app_extension", "watchOSApplicationExtension")
+             .Default(Platform);
+}
+static llvm::StringRef canonicalizePlatformName(llvm::StringRef Platform) {
+    return llvm::StringSwitch<llvm::StringRef>(Platform)
+             .Case("iOS", "ios")
+             .Case("macOS", "macos")
+             .Case("tvOS", "tvos")
+             .Case("watchOS", "watchos")
+             .Case("iOSApplicationExtension", "ios_app_extension")
+             .Case("macOSApplicationExtension", "macos_app_extension")
+             .Case("tvOSApplicationExtension", "tvos_app_extension")
+             .Case("watchOSApplicationExtension", "watchos_app_extension")
+             .Default(Platform);
 } }];
   let HasCustomParsing = 1;
   let DuplicatesAllowedWhileMerging = 1;
diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def
index 816ea156f979..a9ec172422ab 100644
--- a/include/clang/Basic/Builtins.def
+++ b/include/clang/Basic/Builtins.def
@@ -1409,6 +1409,9 @@ LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG)
 BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut")
 BUILTIN(__builtin_os_log_format, "v*v*cC*.", "p:0:nt")
 
+// Builtins for XRay
+BUILTIN(__xray_customevent, "vcC*z", "")
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td
index 05e03fab40fa..e1a41584023c 100644
--- a/include/clang/Basic/DiagnosticGroups.td
+++ b/include/clang/Basic/DiagnosticGroups.td
@@ -486,6 +486,7 @@ def UnneededInternalDecl : DiagGroup<"unneeded-internal-declaration">;
 def UnneededMemberFunction : DiagGroup<"unneeded-member-function">;
 def UnusedPrivateField : DiagGroup<"unused-private-field">;
 def UnusedFunction : DiagGroup<"unused-function", [UnneededInternalDecl]>;
+def UnusedTemplate : DiagGroup<"unused-template", [UnneededInternalDecl]>;
 def UnusedMemberFunction : DiagGroup<"unused-member-function",
                                      [UnneededMemberFunction]>;
 def UnusedLabel : DiagGroup<"unused-label">;
@@ -627,6 +628,7 @@ def Conversion : DiagGroup<"conversion",
 def Unused : DiagGroup<"unused",
                        [UnusedArgument, UnusedFunction, UnusedLabel,
                         // UnusedParameter, (matches GCC's behavior)
+                        // UnusedTemplate, (clean-up libc++ before enabling)
                         // UnusedMemberFunction, (clean-up llvm before enabling)
                         UnusedPrivateField, UnusedLambdaCapture,
                         UnusedLocalTypedef, UnusedValue, UnusedVariable,
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index a0c0e5f86449..1db6704f6d1f 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -303,6 +303,8 @@ def note_empty_parens_zero_initialize : Note<
   "replace parentheses with an initializer to declare a variable">;
 def warn_unused_function : Warning<"unused function %0">,
   InGroup<UnusedFunction>, DefaultIgnore;
+def warn_unused_template : Warning<"unused %select{function|variable}0 template %1">,
+  InGroup<UnusedTemplate>, DefaultIgnore;
 def warn_unused_member_function : Warning<"unused member function %0">,
   InGroup<UnusedMemberFunction>, DefaultIgnore;
 def warn_used_but_marked_unused: Warning<"%0 was marked unused but was used">,
@@ -2463,6 +2465,9 @@ def err_attribute_invalid_size : Error<
   "vector size not an integral multiple of component size">;
 def err_attribute_zero_size : Error<"zero vector size">;
 def err_attribute_size_too_large : Error<"vector size too large">;
+def err_typecheck_vector_not_convertable_implict_truncation : Error<
+   "cannot convert between %select{scalar|vector}0 type %1 and vector type"
+   " %2 as implicit conversion would cause truncation">;
 def err_typecheck_vector_not_convertable : Error<
   "cannot convert between vector values of different size (%0 and %1)">;
 def err_typecheck_vector_not_convertable_non_scalar : Error<
@@ -4607,6 +4612,8 @@ def err_abi_tag_on_redeclaration : Error<
   "cannot add 'abi_tag' attribute in a redeclaration">;
 def err_new_abi_tag_on_redeclaration : Error<
   "'abi_tag' %0 missing in original declaration">;
+def note_use_ifdef_guards : Note<
+  "unguarded header; consider using #ifdef guards or #pragma once">;
 
 def note_deleted_dtor_no_operator_delete : Note<
   "virtual destructor requires an unambiguous, accessible 'operator delete'">;
@@ -5775,6 +5782,9 @@ def err_objc_object_assignment : Error<
   "cannot assign to class object (%0 invalid)">;
 def err_typecheck_invalid_operands : Error<
   "invalid operands to binary expression (%0 and %1)">;
+def err_typecheck_logical_vector_expr_gnu_cpp_restrict : Error<
+  "logical expression with vector %select{type %1 and non-vector type %2|types"
+  " %1 and %2}0 is only supported in C++">;
 def err_typecheck_sub_ptr_compatible : Error<
   "%diff{$ and $ are not pointers to compatible types|"
   "pointers to incompatible types}0,1">;
@@ -8186,9 +8196,20 @@ def err_undeclared_use_suggest : Error<
   "use of undeclared %0; did you mean %1?">;
 def err_undeclared_var_use_suggest : Error<
   "use of undeclared identifier %0; did you mean %1?">;
+def err_no_template : Error<"no template named %0">;
 def err_no_template_suggest : Error<"no template named %0; did you mean %1?">;
+def err_no_member_template : Error<"no template named %0 in %1">;
 def err_no_member_template_suggest : Error<
   "no template named %0 in %1; did you mean %select{|simply }2%3?">;
+def err_non_template_in_template_id : Error<
+  "%0 does not name a template but is followed by template arguments">;
+def err_non_template_in_template_id_suggest : Error<
+  "%0 does not name a template but is followed by template arguments; "
+  "did you mean %1?">;
+def err_non_template_in_member_template_id_suggest : Error<
+  "member %0 of %1 is not a template; did you mean %select{|simply }2%3?">;
+def note_non_template_in_template_id_found : Note<
+  "non-template declaration found by name lookup">;
 def err_mem_init_not_member_or_class_suggest : Error<
   "initializer %0 does not name a non-static data member or base "
   "class; did you mean the %select{base class|member}1 %2?">;
@@ -8875,6 +8896,13 @@ def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn<
   InGroup<DiagGroup<"modules-ambiguous-internal-linkage">>;
 def note_equivalent_internal_linkage_decl : Note<
   "declared here%select{ in module '%1'|}0">;
+
+def note_redefinition_modules_same_file : Note<
+	"'%0' included multiple times, additional include site in header from module '%1'">;
+def note_redefinition_modules_same_file_modulemap : Note<
+	"consider adding '%0' as part of '%1' definition">;
+def note_redefinition_include_same_file : Note<
+	"'%0' included multiple times, additional include site here">;
 }
 
 let CategoryName = "Coroutines Issue" in {
diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h
index 2889cce5963b..6ca1ba39c8fb 100644
--- a/include/clang/Basic/TargetOptions.h
+++ b/include/clang/Basic/TargetOptions.h
@@ -24,8 +24,7 @@ namespace clang {
 /// \brief Options for controlling the target.
 class TargetOptions {
 public:
-  /// If given, the name of the target triple to compile for. If not given the
-  /// target will be selected to match the host.
+  /// The name of the target triple to compile for.
   std::string Triple;
 
   /// When compiling for the device side, contains the triple used to compile
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 31015228f362..d812bd8ec032 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -827,6 +827,9 @@ def fno_sanitize_address_use_after_scope : Flag<["-"], "fno-sanitize-address-use
                                            Group<f_clang_Group>,
                                            Flags<[CoreOption, DriverOption]>,
                                            HelpText<"Disable use-after-scope detection in AddressSanitizer">;
+def fsanitize_address_globals_dead_stripping : Flag<["-"], "fsanitize-address-globals-dead-stripping">,
+                                        Group<f_clang_Group>,
+                                        HelpText<"Enable linker dead stripping of globals in AddressSanitizer">;
 def fsanitize_recover : Flag<["-"], "fsanitize-recover">, Group<f_clang_Group>;
 def fno_sanitize_recover : Flag<["-"], "fno-sanitize-recover">,
                            Flags<[CoreOption, DriverOption]>,
diff --git a/include/clang/Driver/SanitizerArgs.h b/include/clang/Driver/SanitizerArgs.h
index c7b3e8006dd5..a9645d463fa1 100644
--- a/include/clang/Driver/SanitizerArgs.h
+++ b/include/clang/Driver/SanitizerArgs.h
@@ -35,6 +35,7 @@ class SanitizerArgs {
   int AsanFieldPadding = 0;
   bool AsanSharedRuntime = false;
   bool AsanUseAfterScope = true;
+  bool AsanGlobalsDeadStripping = false;
   bool LinkCXXRuntimes = false;
   bool NeedPIE = false;
   bool Stats = false;
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index 9bed253baca2..a963c6369aa9 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -98,22 +98,39 @@ struct FormatStyle {
   /// \endcode
   bool AlignConsecutiveDeclarations;
 
-  /// \brief If ``true``, aligns escaped newlines as far left as possible.
-  /// Otherwise puts them into the right-most column.
-  /// \code
-  ///   true:
-  ///   #define A   \
-  ///     int aaaa; \
-  ///     int b;    \
-  ///     int dddddddddd;
-  ///
-  ///   false:
-  ///   #define A                                                                      \
-  ///     int aaaa;                                                                    \
-  ///     int b;                                                                       \
-  ///     int dddddddddd;
-  /// \endcode
-  bool AlignEscapedNewlinesLeft;
+  /// \brief Different styles for aligning escaped newlines.
+  enum EscapedNewlineAlignmentStyle {
+    /// \brief Don't align escaped newlines.
+    /// \code
+    ///   #define A \
+    ///     int aaaa; \
+    ///     int b; \
+    ///     int dddddddddd;
+    /// \endcode
+    ENAS_DontAlign,
+    /// \brief Align escaped newlines as far left as possible.
+    /// \code
+    ///   true:
+    ///   #define A   \
+    ///     int aaaa; \
+    ///     int b;    \
+    ///     int dddddddddd;
+    ///
+    ///   false:
+    /// \endcode
+    ENAS_Left,
+    /// \brief Align escaped newlines in the right-most column.
+    /// \code
+    ///   #define A                                                                      \
+    ///     int aaaa;                                                                    \
+    ///     int b;                                                                       \
+    ///     int dddddddddd;
+    /// \endcode
+    ENAS_Right,
+  };
+
+  /// \brief Options for aligning backslashes in escaped newlines.
+  EscapedNewlineAlignmentStyle AlignEscapedNewlines;
 
   /// \brief If ``true``, horizontally align operands of binary and ternary
   /// expressions.
@@ -1347,7 +1364,7 @@ struct FormatStyle {
            AlignAfterOpenBracket == R.AlignAfterOpenBracket &&
            AlignConsecutiveAssignments == R.AlignConsecutiveAssignments &&
            AlignConsecutiveDeclarations == R.AlignConsecutiveDeclarations &&
-           AlignEscapedNewlinesLeft == R.AlignEscapedNewlinesLeft &&
+           AlignEscapedNewlines == R.AlignEscapedNewlines &&
            AlignOperands == R.AlignOperands &&
            AlignTrailingComments == R.AlignTrailingComments &&
            AllowAllParametersOfDeclarationOnNextLine ==
diff --git a/include/clang/Frontend/CodeGenOptions.def b/include/clang/Frontend/CodeGenOptions.def
index 7495ad808c99..251441d38ff8 100644
--- a/include/clang/Frontend/CodeGenOptions.def
+++ b/include/clang/Frontend/CodeGenOptions.def
@@ -137,6 +137,8 @@ CODEGENOPT(StructPathTBAA    , 1, 0) ///< Whether or not to use struct-path TBAA
 CODEGENOPT(SaveTempLabels    , 1, 0) ///< Save temporary labels.
 CODEGENOPT(SanitizeAddressUseAfterScope , 1, 0) ///< Enable use-after-scope detection
                                                 ///< in AddressSanitizer
+CODEGENOPT(SanitizeAddressGlobalsDeadStripping, 1, 0) ///< Enable linker dead stripping
+                                                      ///< of globals in AddressSanitizer
 CODEGENOPT(SanitizeMemoryTrackOrigins, 2, 0) ///< Enable tracking origins in
                                              ///< MemorySanitizer
 CODEGENOPT(SanitizeMemoryUseAfterDtor, 1, 0) ///< Enable use-after-delete detection
diff --git a/include/clang/Lex/MacroInfo.h b/include/clang/Lex/MacroInfo.h
index 44b7b2e4a474..7da1e7b41ab8 100644
--- a/include/clang/Lex/MacroInfo.h
+++ b/include/clang/Lex/MacroInfo.h
@@ -105,9 +105,6 @@ class MacroInfo {
   /// \brief Must warn if the macro is unused at the end of translation unit.
   bool IsWarnIfUnused : 1;
 
-  /// \brief Whether this macro info was loaded from an AST file.
-  bool FromASTFile : 1;
-
   /// \brief Whether this macro was used as header guard.
   bool UsedForHeaderGuard : 1;
 
@@ -264,34 +261,16 @@ class MacroInfo {
     IsDisabled = true;
   }
 
-  /// \brief Determine whether this macro info came from an AST file (such as
-  /// a precompiled header or module) rather than having been parsed.
-  bool isFromASTFile() const { return FromASTFile; }
-
   /// \brief Determine whether this macro was used for a header guard.
   bool isUsedForHeaderGuard() const { return UsedForHeaderGuard; }
 
   void setUsedForHeaderGuard(bool Val) { UsedForHeaderGuard = Val; }
 
-  /// \brief Retrieve the global ID of the module that owns this particular
-  /// macro info.
-  unsigned getOwningModuleID() const {
-    if (isFromASTFile())
-      return *(const unsigned *)(this + 1);
-
-    return 0;
-  }
-
   void dump() const;
 
 private:
   unsigned getDefinitionLengthSlow(const SourceManager &SM) const;
 
-  void setOwningModuleID(unsigned ID) {
-    assert(isFromASTFile());
-    *(unsigned *)(this + 1) = ID;
-  }
-
   friend class Preprocessor;
 };
 
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 0e3f563785d4..114bf70ad59a 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -644,14 +644,6 @@ class Preprocessor {
   /// of that list.
   MacroInfoChain *MIChainHead;
 
-  struct DeserializedMacroInfoChain {
-    MacroInfo MI;
-    unsigned OwningModuleID; // MUST be immediately after the MacroInfo object
-                     // so it can be accessed by MacroInfo::getOwningModuleID().
-    DeserializedMacroInfoChain *Next;
-  };
-  DeserializedMacroInfoChain *DeserialMIChainHead;
-
   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
 
 public:
@@ -1669,10 +1661,6 @@ class Preprocessor {
   /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
   MacroInfo *AllocateMacroInfo(SourceLocation L);
 
-  /// \brief Allocate a new MacroInfo object loaded from an AST file.
-  MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L,
-                                           unsigned SubModuleID);
-
   /// \brief Turn the specified lexer token into a fully checked and spelled
   /// filename, e.g. as an operand of \#include. 
   ///
@@ -1764,9 +1752,6 @@ class Preprocessor {
   /// macro name.
   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
 
-  /// \brief Allocate a new MacroInfo object.
-  MacroInfo *AllocateMacroInfo();
-
   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
                                                SourceLocation Loc);
   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 8d0935dec1b6..f5a7e02941a7 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -1488,6 +1488,8 @@ class Parser : public CodeCompletionHandler {
             K == tok::plusplus || K == tok::minusminus);
   }
 
+  bool diagnoseUnknownTemplateId(ExprResult TemplateName, SourceLocation Less);
+
   ExprResult ParsePostfixExpressionSuffix(ExprResult LHS);
   ExprResult ParseUnaryExprOrTypeTraitExpression();
   ExprResult ParseBuiltinPrimaryExpression();
@@ -2723,10 +2725,7 @@ class Parser : public CodeCompletionHandler {
   bool ParseGreaterThanInTemplateList(SourceLocation &RAngleLoc,
                                       bool ConsumeLastToken,
                                       bool ObjCGenericList);
-  bool ParseTemplateIdAfterTemplateName(TemplateTy Template,
-                                        SourceLocation TemplateNameLoc,
-                                        const CXXScopeSpec &SS,
-                                        bool ConsumeLastToken,
+  bool ParseTemplateIdAfterTemplateName(bool ConsumeLastToken,
                                         SourceLocation &LAngleLoc,
                                         TemplateArgList &TemplateArgs,
                                         SourceLocation &RAngleLoc);
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index e5961079f7c2..e910be14f969 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -1074,6 +1074,10 @@ class Sema {
   /// correctly named definition after the renamed definition.
   llvm::SmallPtrSet<const NamedDecl *, 4> TypoCorrectedFunctionDefinitions;
 
+  /// Stack of types that correspond to the parameter entities that are
+  /// currently being copy-initialized. Can be empty.
+  llvm::SmallVector<QualType, 4> CurrentParameterCopyTypes;
+
   void ReadMethodPool(Selector Sel);
   void updateOutOfDateSelector(Selector Sel);
 
@@ -1456,6 +1460,11 @@ class Sema {
   /// The modules we're currently parsing.
   llvm::SmallVector<ModuleScope, 16> ModuleScopes;
 
+  /// Get the module whose scope we are currently within.
+  Module *getCurrentModule() const {
+    return ModuleScopes.empty() ? nullptr : ModuleScopes.back().Module;
+  }
+
   VisibleModuleSet VisibleModules;
 
   Module *CachedFakeTopLevelModule;
@@ -1466,7 +1475,7 @@ class Sema {
 
   /// \brief Make a merged definition of an existing hidden definition \p ND
   /// visible at the specified location.
-  void makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc);
+  void makeMergedDefinitionVisible(NamedDecl *ND);
 
   bool isModuleVisible(Module *M) { return VisibleModules.isVisible(M); }
 
@@ -1593,7 +1602,7 @@ class Sema {
                                Scope *S,
                                CXXScopeSpec *SS,
                                ParsedType &SuggestedType,
-                               bool AllowClassTemplates = false);
+                               bool IsTemplateName = false);
 
   /// Attempt to behave like MSVC in situations where lookup of an unqualified
   /// type name has failed in a dependent context. In these situations, we
@@ -1738,6 +1747,23 @@ class Sema {
   TemplateNameKindForDiagnostics
   getTemplateNameKindForDiagnostics(TemplateName Name);
 
+  /// Determine whether it's plausible that E was intended to be a
+  /// template-name.
+  bool mightBeIntendedToBeTemplateName(ExprResult E) {
+    if (!getLangOpts().CPlusPlus || E.isInvalid())
+      return false;
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E.get()))
+      return !DRE->hasExplicitTemplateArgs();
+    if (auto *ME = dyn_cast<MemberExpr>(E.get()))
+      return !ME->hasExplicitTemplateArgs();
+    // Any additional cases recognized here should also be handled by
+    // diagnoseExprIntendedAsTemplateName.
+    return false;
+  }
+  void diagnoseExprIntendedAsTemplateName(Scope *S, ExprResult TemplateName,
+                                          SourceLocation Less,
+                                          SourceLocation Greater);
+
   Decl *ActOnDeclarator(Scope *S, Declarator &D);
 
   NamedDecl *HandleDeclarator(Scope *S, Declarator &D,
@@ -2336,6 +2362,7 @@ class Sema {
   void MergeVarDeclTypes(VarDecl *New, VarDecl *Old, bool MergeTypeWithOld);
   void MergeVarDeclExceptionSpecs(VarDecl *New, VarDecl *Old);
   bool checkVarDeclRedefinition(VarDecl *OldDefn, VarDecl *NewDefn);
+  void notePreviousDefinition(SourceLocation Old, SourceLocation New);
   bool MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old, Scope *S);
 
   // AssignmentAction - This is used by all the assignment diagnostic functions
@@ -2726,7 +2753,8 @@ class Sema {
   resolveAddressOfOnlyViableOverloadCandidate(Expr *E,
                                               DeclAccessPair &FoundResult);
 
-  bool resolveAndFixAddressOfOnlyViableOverloadCandidate(ExprResult &SrcExpr);
+  bool resolveAndFixAddressOfOnlyViableOverloadCandidate(
+      ExprResult &SrcExpr, bool DoFunctionPointerConversion = false);
 
   FunctionDecl *
   ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
@@ -3049,7 +3077,8 @@ class Sema {
                           bool IncludeGlobalScope = true);
   void LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
                           VisibleDeclConsumer &Consumer,
-                          bool IncludeGlobalScope = true);
+                          bool IncludeGlobalScope = true,
+                          bool IncludeDependentBases = false);
 
   enum CorrectTypoKind {
     CTK_NonError,     // CorrectTypo used in a non error recovery situation.
@@ -6084,6 +6113,7 @@ class Sema {
                          TemplateArgumentListInfo *ExplicitTemplateArgs,
                                            LookupResult &Previous);
   bool CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous);
+  void CompleteMemberSpecialization(NamedDecl *Member, LookupResult &Previous);
 
   DeclResult
   ActOnExplicitInstantiation(Scope *S,
@@ -9259,6 +9289,8 @@ class Sema {
   /// type checking binary operators (subroutines of CreateBuiltinBinOp).
   QualType InvalidOperands(SourceLocation Loc, ExprResult &LHS,
                            ExprResult &RHS);
+  QualType InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS,
+                                 ExprResult &RHS);
   QualType CheckPointerToMemberOperands( // C++ 5.5
     ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK,
     SourceLocation OpLoc, bool isIndirect);
@@ -10008,6 +10040,7 @@ class Sema {
                                              MacroInfo *MacroInfo,
                                              unsigned Argument);
   void CodeCompleteNaturalLanguage();
+  void CodeCompleteAvailabilityPlatformName();
   void GatherGlobalCodeCompletions(CodeCompletionAllocator &Allocator,
                                    CodeCompletionTUInfo &CCTUInfo,
                   SmallVectorImpl<CodeCompletionResult> &Results);
diff --git a/include/clang/Tooling/RefactoringCallbacks.h b/include/clang/Tooling/RefactoringCallbacks.h
index 6ef9ea11f0ae..9862951149a3 100644
--- a/include/clang/Tooling/RefactoringCallbacks.h
+++ b/include/clang/Tooling/RefactoringCallbacks.h
@@ -47,6 +47,33 @@ class RefactoringCallback : public ast_matchers::MatchFinder::MatchCallback {
   Replacements Replace;
 };
 
+/// \brief Adaptor between \c ast_matchers::MatchFinder and \c
+/// tooling::RefactoringTool.
+///
+/// Runs AST matchers and stores the \c tooling::Replacements in a map.
+class ASTMatchRefactorer {
+public:
+  explicit ASTMatchRefactorer(
+    std::map<std::string, Replacements> &FileToReplaces);
+
+  template <typename T>
+  void addMatcher(const T &Matcher, RefactoringCallback *Callback) {
+    MatchFinder.addMatcher(Matcher, Callback);
+    Callbacks.push_back(Callback);
+  }
+
+  void addDynamicMatcher(const ast_matchers::internal::DynTypedMatcher &Matcher,
+                         RefactoringCallback *Callback);
+
+  std::unique_ptr<ASTConsumer> newASTConsumer();
+
+private:
+  friend class RefactoringASTConsumer;
+  std::vector<RefactoringCallback *> Callbacks;
+  ast_matchers::MatchFinder MatchFinder;
+  std::map<std::string, Replacements> &FileToReplaces;
+};
+
 /// \brief Replace the text of the statement bound to \c FromId with the text in
 /// \c ToText.
 class ReplaceStmtWithText : public RefactoringCallback {
@@ -59,6 +86,29 @@ class ReplaceStmtWithText : public RefactoringCallback {
   std::string ToText;
 };
 
+/// \brief Replace the text of an AST node bound to \c FromId with the result of
+/// evaluating the template in \c ToTemplate.
+///
+/// Expressions of the form ${NodeName} in \c ToTemplate will be
+/// replaced by the text of the node bound to ${NodeName}. The string
+/// "$$" will be replaced by "$".
+class ReplaceNodeWithTemplate : public RefactoringCallback {
+public:
+  static llvm::Expected<std::unique_ptr<ReplaceNodeWithTemplate>>
+  create(StringRef FromId, StringRef ToTemplate);
+  void run(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+  struct TemplateElement {
+    enum { Literal, Identifier } Type;
+    std::string Value;
+  };
+  ReplaceNodeWithTemplate(llvm::StringRef FromId,
+                          std::vector<TemplateElement> Template);
+  std::string FromId;
+  std::vector<TemplateElement> Template;
+};
+
 /// \brief Replace the text of the statement bound to \c FromId with the text of
 /// the statement bound to \c ToId.
 class ReplaceStmtWithStmt : public RefactoringCallback {
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp
index 4fb6051d6f58..847638b7bbeb 100644
--- a/lib/AST/ASTImporter.cpp
+++ b/lib/AST/ASTImporter.cpp
@@ -1622,10 +1622,18 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
 
   // We may already have a record of the same name; try to find and match it.
   RecordDecl *AdoptDecl = nullptr;
+  RecordDecl *PrevDecl = nullptr;
   if (!DC->isFunctionOrMethod()) {
     SmallVector<NamedDecl *, 4> ConflictingDecls;
     SmallVector<NamedDecl *, 2> FoundDecls;
     DC->getRedeclContext()->localUncachedLookup(SearchName, FoundDecls);
+
+    if (!FoundDecls.empty()) {
+      // We're going to have to compare D against potentially conflicting Decls, so complete it.
+      if (D->hasExternalLexicalStorage() && !D->isCompleteDefinition())
+        D->getASTContext().getExternalSource()->CompleteType(D);
+    }
+
     for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
       if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
         continue;
@@ -1652,6 +1660,8 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
           }
         }
 
+        PrevDecl = FoundRecord;
+
         if (RecordDecl *FoundDef = FoundRecord->getDefinition()) {
           if ((SearchName && !D->isCompleteDefinition())
               || (D->isCompleteDefinition() &&
@@ -1744,6 +1754,10 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
     LexicalDC->addDeclInternal(D2);
     if (D->isAnonymousStructOrUnion())
       D2->setAnonymousStructOrUnion(true);
+    if (PrevDecl) {
+      // FIXME: do this for all Redeclarables, not just RecordDecls.
+      D2->setPreviousDecl(PrevDecl);
+    }
   }
   
   Importer.Imported(D, D2);
diff --git a/lib/AST/ASTStructuralEquivalence.cpp b/lib/AST/ASTStructuralEquivalence.cpp
index 8fe72eac4133..9376ee1d4ee4 100644
--- a/lib/AST/ASTStructuralEquivalence.cpp
+++ b/lib/AST/ASTStructuralEquivalence.cpp
@@ -855,6 +855,11 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
 
   if (CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(D1)) {
     if (CXXRecordDecl *D2CXX = dyn_cast<CXXRecordDecl>(D2)) {
+      if (D1CXX->hasExternalLexicalStorage() &&
+          !D1CXX->isCompleteDefinition()) {
+        D1CXX->getASTContext().getExternalSource()->CompleteType(D1CXX);
+      }
+
       if (D1CXX->getNumBases() != D2CXX->getNumBases()) {
         if (Context.Complain) {
           Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent)
diff --git a/lib/AST/CXXInheritance.cpp b/lib/AST/CXXInheritance.cpp
index 56fb0464078f..746602d47be5 100644
--- a/lib/AST/CXXInheritance.cpp
+++ b/lib/AST/CXXInheritance.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/RecordLayout.h"
 #include "llvm/ADT/SetVector.h"
 #include <algorithm>
@@ -174,9 +175,10 @@ bool CXXRecordDecl::forallBases(ForallBasesCallback BaseMatches,
   return AllMatches;
 }
 
-bool CXXBasePaths::lookupInBases(
-    ASTContext &Context, const CXXRecordDecl *Record,
-    CXXRecordDecl::BaseMatchesCallback BaseMatches) {
+bool CXXBasePaths::lookupInBases(ASTContext &Context,
+                                 const CXXRecordDecl *Record,
+                                 CXXRecordDecl::BaseMatchesCallback BaseMatches,
+                                 bool LookupInDependent) {
   bool FoundPath = false;
 
   // The access of the path down to this record.
@@ -194,7 +196,7 @@ bool CXXBasePaths::lookupInBases(
     //   the base class scope is not examined during unqualified name lookup 
     //   either at the point of definition of the class template or member or 
     //   during an instantiation of the class tem- plate or member.
-    if (BaseType->isDependentType())
+    if (!LookupInDependent && BaseType->isDependentType())
       continue;
     
     // Determine whether we need to visit this base class at all,
@@ -262,10 +264,28 @@ bool CXXBasePaths::lookupInBases(
         return FoundPath;
       }
     } else if (VisitBase) {
-      CXXRecordDecl *BaseRecord
-        = cast<CXXRecordDecl>(BaseSpec.getType()->castAs<RecordType>()
-                                ->getDecl());
-      if (lookupInBases(Context, BaseRecord, BaseMatches)) {
+      CXXRecordDecl *BaseRecord;
+      if (LookupInDependent) {
+        BaseRecord = nullptr;
+        const TemplateSpecializationType *TST =
+            BaseSpec.getType()->getAs<TemplateSpecializationType>();
+        if (!TST) {
+          if (auto *RT = BaseSpec.getType()->getAs<RecordType>())
+            BaseRecord = cast<CXXRecordDecl>(RT->getDecl());
+        } else {
+          TemplateName TN = TST->getTemplateName();
+          if (auto *TD =
+                  dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl()))
+            BaseRecord = TD->getTemplatedDecl();
+        }
+        if (BaseRecord && !BaseRecord->hasDefinition())
+          BaseRecord = nullptr;
+      } else {
+        BaseRecord = cast<CXXRecordDecl>(
+            BaseSpec.getType()->castAs<RecordType>()->getDecl());
+      }
+      if (BaseRecord &&
+          lookupInBases(Context, BaseRecord, BaseMatches, LookupInDependent)) {
         // C++ [class.member.lookup]p2:
         //   A member name f in one sub-object B hides a member name f in
         //   a sub-object A if A is a base class sub-object of B. Any
@@ -299,9 +319,11 @@ bool CXXBasePaths::lookupInBases(
 }
 
 bool CXXRecordDecl::lookupInBases(BaseMatchesCallback BaseMatches,
-                                  CXXBasePaths &Paths) const {
+                                  CXXBasePaths &Paths,
+                                  bool LookupInDependent) const {
   // If we didn't find anything, report that.
-  if (!Paths.lookupInBases(getASTContext(), this, BaseMatches))
+  if (!Paths.lookupInBases(getASTContext(), this, BaseMatches,
+                           LookupInDependent))
     return false;
 
   // If we're not recording paths or we won't ever find ambiguities,
@@ -387,23 +409,49 @@ bool CXXRecordDecl::FindTagMember(const CXXBaseSpecifier *Specifier,
   return false;
 }
 
-bool CXXRecordDecl::FindOrdinaryMember(const CXXBaseSpecifier *Specifier, 
-                                       CXXBasePath &Path,
-                                       DeclarationName Name) {
-  RecordDecl *BaseRecord =
-    Specifier->getType()->castAs<RecordType>()->getDecl();
-  
-  const unsigned IDNS = IDNS_Ordinary | IDNS_Tag | IDNS_Member;
+static bool findOrdinaryMember(RecordDecl *BaseRecord, CXXBasePath &Path,
+                               DeclarationName Name) {
+  const unsigned IDNS = clang::Decl::IDNS_Ordinary | clang::Decl::IDNS_Tag |
+                        clang::Decl::IDNS_Member;
   for (Path.Decls = BaseRecord->lookup(Name);
        !Path.Decls.empty();
        Path.Decls = Path.Decls.slice(1)) {
     if (Path.Decls.front()->isInIdentifierNamespace(IDNS))
       return true;
   }
-  
+
   return false;
 }
 
+bool CXXRecordDecl::FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
+                                       CXXBasePath &Path,
+                                       DeclarationName Name) {
+  RecordDecl *BaseRecord =
+      Specifier->getType()->castAs<RecordType>()->getDecl();
+  return findOrdinaryMember(BaseRecord, Path, Name);
+}
+
+bool CXXRecordDecl::FindOrdinaryMemberInDependentClasses(
+    const CXXBaseSpecifier *Specifier, CXXBasePath &Path,
+    DeclarationName Name) {
+  const TemplateSpecializationType *TST =
+      Specifier->getType()->getAs<TemplateSpecializationType>();
+  if (!TST) {
+    auto *RT = Specifier->getType()->getAs<RecordType>();
+    if (!RT)
+      return false;
+    return findOrdinaryMember(RT->getDecl(), Path, Name);
+  }
+  TemplateName TN = TST->getTemplateName();
+  const auto *TD = dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
+  if (!TD)
+    return false;
+  CXXRecordDecl *RD = TD->getTemplatedDecl();
+  if (!RD)
+    return false;
+  return findOrdinaryMember(RD, Path, Name);
+}
+
 bool CXXRecordDecl::FindOMPReductionMember(const CXXBaseSpecifier *Specifier,
                                            CXXBasePath &Path,
                                            DeclarationName Name) {
@@ -438,6 +486,36 @@ FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier,
   return false;
 }
 
+std::vector<const NamedDecl *> CXXRecordDecl::lookupDependentName(
+    const DeclarationName &Name,
+    llvm::function_ref<bool(const NamedDecl *ND)> Filter) {
+  std::vector<const NamedDecl *> Results;
+  // Lookup in the class.
+  DeclContext::lookup_result DirectResult = lookup(Name);
+  if (!DirectResult.empty()) {
+    for (const NamedDecl *ND : DirectResult) {
+      if (Filter(ND))
+        Results.push_back(ND);
+    }
+    return Results;
+  }
+  // Perform lookup into our base classes.
+  CXXBasePaths Paths;
+  Paths.setOrigin(this);
+  if (!lookupInBases(
+          [&](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+            return CXXRecordDecl::FindOrdinaryMemberInDependentClasses(
+                Specifier, Path, Name);
+          },
+          Paths, /*LookupInDependent=*/true))
+    return Results;
+  for (const NamedDecl *ND : Paths.front().Decls) {
+    if (Filter(ND))
+      Results.push_back(ND);
+  }
+  return Results;
+}
+
 void OverridingMethods::add(unsigned OverriddenSubobject, 
                             UniqueVirtualMethod Overriding) {
   SmallVectorImpl<UniqueVirtualMethod> &SubobjectOverrides
diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp
index dd8f768c5711..9f87fe12a9cd 100644
--- a/lib/AST/DeclCXX.cpp
+++ b/lib/AST/DeclCXX.cpp
@@ -1432,8 +1432,9 @@ bool CXXRecordDecl::isAnyDestructorNoReturn() const {
 
   // Check base classes destructor for noreturn.
   for (const auto &Base : bases())
-    if (Base.getType()->getAsCXXRecordDecl()->isAnyDestructorNoReturn())
-      return true;
+    if (const CXXRecordDecl *RD = Base.getType()->getAsCXXRecordDecl())
+      if (RD->isAnyDestructorNoReturn())
+        return true;
 
   // Check fields for noreturn.
   for (const auto *Field : fields())
diff --git a/lib/AST/ExternalASTMerger.cpp b/lib/AST/ExternalASTMerger.cpp
index 8849cfc3c80b..1dc472a5f753 100644
--- a/lib/AST/ExternalASTMerger.cpp
+++ b/lib/AST/ExternalASTMerger.cpp
@@ -178,3 +178,9 @@ void ExternalASTMerger::FindExternalLexicalDecls(
         }
       });
 }
+
+void ExternalASTMerger::CompleteType(TagDecl *Tag) {
+  SmallVector<Decl *, 0> Result;
+  FindExternalLexicalDecls(Tag, [](Decl::Kind) { return true; }, Result);
+  Tag->setHasExternalLexicalStorage(false);
+}
diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp
index 83168d0924f6..f4d314a6dd0d 100644
--- a/lib/AST/ODRHash.cpp
+++ b/lib/AST/ODRHash.cpp
@@ -411,7 +411,7 @@ class ODRTypeVisitor : public TypeVisitor<ODRTypeVisitor> {
 
   void VisitTypedefType(const TypedefType *T) {
     AddDecl(T->getDecl());
-    Hash.AddQualType(T->getDecl()->getUnderlyingType());
+    AddQualType(T->getDecl()->getUnderlyingType().getCanonicalType());
     VisitType(T);
   }
 };
diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp
index 69e65f558f89..2367cadf645c 100644
--- a/lib/AST/Stmt.cpp
+++ b/lib/AST/Stmt.cpp
@@ -1112,7 +1112,7 @@ void CapturedStmt::setCapturedRegionKind(CapturedRegionKind Kind) {
 
 bool CapturedStmt::capturesVariable(const VarDecl *Var) const {
   for (const auto &I : captures()) {
-    if (!I.capturesVariable())
+    if (!I.capturesVariable() && !I.capturesVariableByCopy())
       continue;
 
     // This does not handle variable redeclarations. This should be
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index df26233b4796..22d52bcd3f31 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -2114,18 +2114,15 @@ bool QualType::isTriviallyCopyableType(const ASTContext &Context) const {
   if (hasNonTrivialObjCLifetime())
     return false;
 
-  // C++11 [basic.types]p9
+  // C++11 [basic.types]p9 - See Core 2094
   //   Scalar types, trivially copyable class types, arrays of such types, and
-  //   non-volatile const-qualified versions of these types are collectively
+  //   cv-qualified versions of these types are collectively
   //   called trivially copyable types.
 
   QualType CanonicalType = getCanonicalType();
   if (CanonicalType->isDependentType())
     return false;
 
-  if (CanonicalType.isVolatileQualified())
-    return false;
-
   // Return false for incomplete types after skipping any incomplete array types
   // which are expressly allowed by the standard and thus our API.
   if (CanonicalType->isIncompleteType())
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 33eb0b05ddcd..92c561aa9413 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -6862,6 +6862,11 @@ class SparcTargetInfo : public TargetInfo {
     case 'N': // Same as 'K' but zext (required for SIMode)
     case 'O': // The constant 4096
       return true;
+
+    case 'f':
+    case 'e':
+      info.setAllowsRegister();
+      return true;
     }
     return false;
   }
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 0d96f2efa60a..0f07169ac8b0 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -194,6 +194,8 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
 // where this is not a factor). Also, on ELF this feature requires an assembler
 // extension that only works with -integrated-as at the moment.
 static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
+  if (!CGOpts.SanitizeAddressGlobalsDeadStripping)
+    return false;
   switch (T.getObjectFormat()) {
   case Triple::MachO:
   case Triple::COFF:
@@ -1071,7 +1073,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
     // into memory and pass it into runThinLTOBackend, which will run the
     // function importer and invoke LTO passes.
     Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-        llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
+        llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile,
+                                           /*IgnoreEmptyThinLTOIndexFile*/true);
     if (!IndexOrErr) {
       logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
                             "Error loading index file '" +
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 2b2a92dd6019..f1c20e9df1f3 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -878,7 +878,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
 
     // If type is const-qualified, copy the value into the block field.
     } else if (type.isConstQualified() &&
-               type.getObjCLifetime() == Qualifiers::OCL_Strong) {
+               type.getObjCLifetime() == Qualifiers::OCL_Strong &&
+               CGM.getCodeGenOpts().OptimizationLevel != 0) {
       llvm::Value *value = Builder.CreateLoad(src, "captured");
       Builder.CreateStore(value, blockField);
 
@@ -960,9 +961,8 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
   //   const char *signature;   // the block signature
   //   const char *layout;      // reserved
   // };
-  BlockDescriptorType =
-    llvm::StructType::create("struct.__block_descriptor",
-                             UnsignedLongTy, UnsignedLongTy, nullptr);
+  BlockDescriptorType = llvm::StructType::create(
+      "struct.__block_descriptor", UnsignedLongTy, UnsignedLongTy);
 
   // Now form a pointer to that.
   unsigned AddrSpace = 0;
@@ -986,9 +986,8 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
   //   struct __block_descriptor *__descriptor;
   // };
   GenericBlockLiteralType =
-    llvm::StructType::create("struct.__block_literal_generic",
-                             VoidPtrTy, IntTy, IntTy, VoidPtrTy,
-                             BlockDescPtrTy, nullptr);
+      llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+                               IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
 
   return GenericBlockLiteralType;
 }
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 2f05c0e910e5..50c9e22801c7 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -2769,6 +2769,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
                                         Layout.size().getQuantity()));
   }
+
+  case Builtin::BI__xray_customevent: {
+    if (!ShouldXRayInstrumentFunction())
+      return RValue::getIgnored();
+    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
+      if (XRayAttr->neverXRayInstrument())
+        return RValue::getIgnored();
+    }
+    Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
+    auto FTy = F->getFunctionType();
+    auto Arg0 = E->getArg(0);
+    auto Arg0Val = EmitScalarExpr(Arg0);
+    auto Arg0Ty = Arg0->getType();
+    auto PTy0 = FTy->getParamType(0);
+    if (PTy0 != Arg0Val->getType()) {
+      if (Arg0Ty->isArrayType())
+        Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
+      else
+        Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
+    }
+    auto Arg1 = EmitScalarExpr(E->getArg(1));
+    auto PTy1 = FTy->getParamType(1);
+    if (PTy1 != Arg1->getType())
+      Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
+    return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -4545,7 +4571,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
                                        ? Intrinsic::arm_stlexd
                                        : Intrinsic::arm_strexd);
-    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
+    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
 
     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
     Value *Val = EmitScalarExpr(E->getArg(0));
@@ -5375,7 +5401,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
                                        ? Intrinsic::aarch64_stlxp
                                        : Intrinsic::aarch64_stxp);
-    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
+    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
 
     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
@@ -7347,8 +7373,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // unsigned int __cpu_type;
     // unsigned int __cpu_subtype;
     // unsigned int __cpu_features[1];
-    llvm::Type *STy = llvm::StructType::get(
-        Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
+    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+                                            llvm::ArrayType::get(Int32Ty, 1));
 
     // Grab the global __cpu_model.
     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 813cd7400186..d24ef0a8a974 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -265,7 +265,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
       "__cudaRegisterFatBinary");
   // struct { int magic, int version, void * gpu_binary, void * dont_care };
   llvm::StructType *FatbinWrapperTy =
-      llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr);
+      llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
 
   llvm::Function *ModuleCtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 437ab7dd4649..e8bcf0a3ac56 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -51,8 +51,7 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
   if (rv.isComplex()) {
     CodeGenFunction::ComplexPairTy V = rv.getComplexVal();
     llvm::Type *ComplexTy =
-      llvm::StructType::get(V.first->getType(), V.second->getType(),
-                            (void*) nullptr);
+        llvm::StructType::get(V.first->getType(), V.second->getType());
     Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex");
     CGF.Builder.CreateStore(V.first,
                             CGF.Builder.CreateStructGEP(addr, 0, CharUnits()));
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 3e5434660567..9d77c61bd52c 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -209,7 +209,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
 
   // Check namespace.
   if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context))
-    return getOrCreateNameSpace(NSDecl);
+    return getOrCreateNamespace(NSDecl);
 
   if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
     if (!RDecl->isDependentType())
@@ -2860,8 +2860,8 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
 
   if (DebugKind >= codegenoptions::LimitedDebugInfo) {
     if (const NamespaceDecl *NSDecl =
-        dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
-      FDContext = getOrCreateNameSpace(NSDecl);
+        dyn_cast_or_null<NamespaceDecl>(FD->getLexicalDeclContext()))
+      FDContext = getOrCreateNamespace(NSDecl);
     else if (const RecordDecl *RDecl =
              dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
       llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
@@ -3961,7 +3961,7 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
       CGM.getCodeGenOpts().DebugExplicitImport) {
     DBuilder.createImportedModule(
         getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())),
-        getOrCreateNameSpace(NSDecl),
+        getOrCreateNamespace(NSDecl),
         getLineNumber(UD.getLocation()));
   }
 }
@@ -4021,23 +4021,26 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   else
     R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
-        getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
+        getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
         getLineNumber(NA.getLocation()), NA.getName());
   VH.reset(R);
   return R;
 }
 
 llvm::DINamespace *
-CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
-  NSDecl = NSDecl->getCanonicalDecl();
-  auto I = NameSpaceCache.find(NSDecl);
-  if (I != NameSpaceCache.end())
+CGDebugInfo::getOrCreateNamespace(const NamespaceDecl *NSDecl) {
+  // Don't canonicalize the NamespaceDecl here: The DINamespace will be uniqued
+  // if necessary, and this way multiple declarations of the same namespace in
+  // different parent modules stay distinct.
+  auto I = NamespaceCache.find(NSDecl);
+  if (I != NamespaceCache.end())
     return cast<llvm::DINamespace>(I->second);
 
   llvm::DIScope *Context = getDeclContextDescriptor(NSDecl);
+  // Don't trust the context if it is a DIModule (see comment above).
   llvm::DINamespace *NS =
       DBuilder.createNameSpace(Context, NSDecl->getName(), NSDecl->isInline());
-  NameSpaceCache[NSDecl].reset(NS);
+  NamespaceCache[NSDecl].reset(NS);
   return NS;
 }
 
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 5050ca0ad3fa..7de48f278994 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -125,7 +125,7 @@ class CGDebugInfo {
   /// Cache declarations relevant to DW_TAG_imported_declarations (C++
   /// using declarations) that aren't covered by other more specific caches.
   llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache;
-  llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache;
+  llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NamespaceCache;
   llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef>
       NamespaceAliasCache;
   llvm::DenseMap<const Decl *, llvm::TypedTrackingMDRef<llvm::DIDerivedType>>
@@ -194,8 +194,9 @@ class CGDebugInfo {
   getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F);
   /// \return debug info descriptor for vtable.
   llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F);
+
   /// \return namespace descriptor for the given namespace decl.
-  llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N);
+  llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N);
   llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty,
                                       QualType PointeeTy, llvm::DIFile *F);
   llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache);
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 10a0b46d9028..0fa8eeb1c3e1 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -152,7 +152,14 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
 /// EmitVarDecl - This method handles emission of any variable declaration
 /// inside a function, including static vars etc.
 void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
-  if (D.isStaticLocal()) {
+  if (D.hasExternalStorage())
+    // Don't emit it now, allow it to be emitted lazily on its first use.
+    return;
+
+  // Some function-scope variable does not have static storage but still
+  // needs to be emitted like a static variable, e.g. a function-scope
+  // variable in constant address space in OpenCL.
+  if (D.getStorageDuration() != SD_Automatic) {
     llvm::GlobalValue::LinkageTypes Linkage =
         CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
 
@@ -163,10 +170,6 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
     return EmitStaticVarDecl(D, Linkage);
   }
 
-  if (D.hasExternalStorage())
-    // Don't emit it now, allow it to be emitted lazily on its first use.
-    return;
-
   if (D.getType().getAddressSpace() == LangAS::opencl_local)
     return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D);
 
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index ca1535182ec1..e65fa863fe31 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -765,8 +765,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
   llvm::BasicBlock *lpad = createBasicBlock("lpad");
   EmitBlock(lpad);
 
-  llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
-      llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
+  llvm::LandingPadInst *LPadInst =
+      Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0);
 
   llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
   Builder.CreateStore(LPadExn, getExceptionSlot());
@@ -1310,8 +1310,8 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
   if (!CurFn->hasPersonalityFn())
     CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality));
 
-  llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad(
-      llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
+  llvm::LandingPadInst *LPadInst =
+      Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
   llvm::Value *Exn = nullptr;
@@ -1387,8 +1387,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) {
   llvm::Value *Exn = getExceptionFromSlot();
   llvm::Value *Sel = getSelectorFromSlot();
 
-  llvm::Type *LPadType = llvm::StructType::get(Exn->getType(),
-                                               Sel->getType(), nullptr);
+  llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType());
   llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
   LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
   LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
@@ -1747,7 +1746,7 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
   // };
   // int exceptioncode = exception_pointers->ExceptionRecord->ExceptionCode;
   llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo();
-  llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr);
+  llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy);
   llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo());
   llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0);
   Rec = Builder.CreateAlignedLoad(Rec, getPointerAlign());
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 863b4380da47..cef6292c0e4d 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2859,9 +2859,9 @@ void CodeGenFunction::EmitCfiCheckFail() {
   EmitTrapCheck(DataIsNotNullPtr);
 
   llvm::StructType *SourceLocationTy =
-      llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr);
+      llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty);
   llvm::StructType *CfiCheckFailDataTy =
-      llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr);
+      llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy);
 
   llvm::Value *V = Builder.CreateConstGEP2_32(
       CfiCheckFailDataTy,
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 49bbb4808eaa..a05a088f0919 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -512,12 +512,20 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
     currentElement->addIncoming(element, entryBB);
 
     // Emit the actual filler expression.
-    LValue elementLV =
-      CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType);
-    if (filler)
-      EmitInitializationToLValue(filler, elementLV);
-    else
-      EmitNullInitializationToLValue(elementLV);
+    {
+      // C++1z [class.temporary]p5:
+      //   when a default constructor is called to initialize an element of
+      //   an array with no corresponding initializer [...] the destruction of
+      //   every temporary created in a default argument is sequenced before
+      //   the construction of the next array element, if any
+      CodeGenFunction::RunCleanupsScope CleanupsScope(CGF);
+      LValue elementLV =
+        CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType);
+      if (filler)
+        EmitInitializationToLValue(filler, elementLV);
+      else
+        EmitNullInitializationToLValue(elementLV);
+    }
 
     // Move on to the next element.
     llvm::Value *nextElement =
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 53c184130709..6b72774c10a5 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -1361,9 +1361,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
                                         Value.getComplexIntImag());
 
     // FIXME: the target may want to specify that this is packed.
-    llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
-                                                  Complex[1]->getType(),
-                                                  nullptr);
+    llvm::StructType *STy =
+        llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType());
     return llvm::ConstantStruct::get(STy, Complex);
   }
   case APValue::Float: {
@@ -1384,9 +1383,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
                                        Value.getComplexFloatImag());
 
     // FIXME: the target may want to specify that this is packed.
-    llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
-                                                  Complex[1]->getType(),
-                                                  nullptr);
+    llvm::StructType *STy =
+        llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType());
     return llvm::ConstantStruct::get(STy, Complex);
   }
   case APValue::Vector: {
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 70b741651fd1..b8d830ee9f3f 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -89,14 +89,14 @@ struct BinOpInfo {
   }
 
   /// Check if the binop computes a division or a remainder.
-  bool isDivisionLikeOperation() const {
+  bool isDivremOp() const {
     return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign ||
            Opcode == BO_RemAssign;
   }
 
   /// Check if the binop can result in an integer division by zero.
   bool mayHaveIntegerDivisionByZero() const {
-    if (isDivisionLikeOperation())
+    if (isDivremOp())
       if (auto *CI = dyn_cast<llvm::ConstantInt>(RHS))
         return CI->isZero();
     return true;
@@ -104,7 +104,7 @@ struct BinOpInfo {
 
   /// Check if the binop can result in a float division by zero.
   bool mayHaveFloatDivisionByZero() const {
-    if (isDivisionLikeOperation())
+    if (isDivremOp())
       if (auto *CFP = dyn_cast<llvm::ConstantFP>(RHS))
         return CFP->isZero();
     return true;
@@ -2552,6 +2552,7 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
   if (isSigned)
     OpID |= 1;
 
+  CodeGenFunction::SanitizerScope SanScope(&CGF);
   llvm::Type *opTy = CGF.CGM.getTypes().ConvertType(Ops.Ty);
 
   llvm::Function *intrinsic = CGF.CGM.getIntrinsic(IID, opTy);
@@ -2567,7 +2568,6 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
     // If the signed-integer-overflow sanitizer is enabled, emit a call to its
     // runtime. Otherwise, this is a -ftrapv check, so just emit a trap.
     if (!isSigned || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) {
-      CodeGenFunction::SanitizerScope SanScope(&CGF);
       llvm::Value *NotOverflow = Builder.CreateNot(overflow);
       SanitizerMask Kind = isSigned ? SanitizerKind::SignedIntegerOverflow
                               : SanitizerKind::UnsignedIntegerOverflow;
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 821629c50d4a..c8b8be7f4552 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -34,7 +34,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Compiler.h"
-#include <cstdarg>
 
 using namespace clang;
 using namespace CodeGen;
@@ -58,18 +57,19 @@ class LazyRuntimeFunction {
 
   /// Initialises the lazy function with the name, return type, and the types
   /// of the arguments.
-  LLVM_END_WITH_NULL
-  void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, ...) {
+  template <typename... Tys>
+  void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy,
+            Tys *... Types) {
     CGM = Mod;
     FunctionName = name;
     Function = nullptr;
-    std::vector<llvm::Type *> ArgTys;
-    va_list Args;
-    va_start(Args, RetTy);
-    while (llvm::Type *ArgTy = va_arg(Args, llvm::Type *))
-      ArgTys.push_back(ArgTy);
-    va_end(Args);
-    FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
+    if(sizeof...(Tys)) {
+      SmallVector<llvm::Type *, 8> ArgTys({Types...});
+      FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
+    }
+    else {
+      FTy = llvm::FunctionType::get(RetTy, None, false);
+    }
   }
 
   llvm::FunctionType *getType() { return FTy; }
@@ -603,11 +603,10 @@ class CGObjCGCC : public CGObjCGNU {
 public:
   CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
     // IMP objc_msg_lookup(id, SEL);
-    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
-                     nullptr);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy);
     // IMP objc_msg_lookup_super(struct objc_super*, SEL);
     MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-                          PtrToObjCSuperTy, SelectorTy, nullptr);
+                          PtrToObjCSuperTy, SelectorTy);
   }
 };
 
@@ -702,52 +701,51 @@ class CGObjCGNUstep : public CGObjCGNU {
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
       const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
 
-      llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy,
-          PtrTy, PtrTy, IntTy, IMPTy, nullptr);
+      llvm::StructType *SlotStructTy =
+          llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy);
       SlotTy = llvm::PointerType::getUnqual(SlotStructTy);
       // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender);
       SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy,
-          SelectorTy, IdTy, nullptr);
+                        SelectorTy, IdTy);
       // Slot_t objc_msg_lookup_super(struct objc_super*, SEL);
       SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
-              PtrToObjCSuperTy, SelectorTy, nullptr);
+                             PtrToObjCSuperTy, SelectorTy);
       // If we're in ObjC++ mode, then we want to make 
       if (CGM.getLangOpts().CPlusPlus) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // void *__cxa_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr);
+        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy);
         // void __cxa_end_catch(void)
-        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr);
+        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy);
         // void _Unwind_Resume_or_Rethrow(void*)
         ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy,
-            PtrTy, nullptr);
+                                PtrTy);
       } else if (R.getVersion() >= VersionTuple(1, 7)) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // id objc_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr);
+        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy);
         // void objc_end_catch(void)
-        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr);
+        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy);
         // void _Unwind_Resume_or_Rethrow(void*)
-        ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy,
-            PtrTy, nullptr);
+        ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy);
       }
       llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
       SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy,
-          SelectorTy, IdTy, PtrDiffTy, nullptr);
+                             SelectorTy, IdTy, PtrDiffTy);
       SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+                                 IdTy, SelectorTy, IdTy, PtrDiffTy);
       SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+                                IdTy, SelectorTy, IdTy, PtrDiffTy);
       SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy",
-          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
+                                    VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy);
       // void objc_setCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, nullptr);
+                                PtrTy, PtrTy);
       // void objc_getCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, nullptr);
+                                PtrTy, PtrTy);
     }
 
     llvm::Constant *GetCppAtomicObjectGetFunction() override {
@@ -849,14 +847,14 @@ class CGObjCObjFW: public CGObjCGNU {
 public:
   CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) {
     // IMP objc_msg_lookup(id, SEL);
-    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy);
     MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy,
-                         SelectorTy, nullptr);
+                         SelectorTy);
     // IMP objc_msg_lookup_super(struct objc_super*, SEL);
     MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-                          PtrToObjCSuperTy, SelectorTy, nullptr);
+                          PtrToObjCSuperTy, SelectorTy);
     MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy,
-                              PtrToObjCSuperTy, SelectorTy, nullptr);
+                              PtrToObjCSuperTy, SelectorTy);
   }
 };
 } // end anonymous namespace
@@ -945,35 +943,34 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
   }
   PtrToIdTy = llvm::PointerType::getUnqual(IdTy);
 
-  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr);
+  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy);
   PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy);
 
   llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
 
   // void objc_exception_throw(id);
-  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
-  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
+  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy);
+  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy);
   // int objc_sync_enter(id);
-  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr);
+  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy);
   // int objc_sync_exit(id);
-  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr);
+  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy);
 
   // void objc_enumerationMutation (id)
-  EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy,
-      IdTy, nullptr);
+  EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, IdTy);
 
   // id objc_getProperty(id, SEL, ptrdiff_t, BOOL)
   GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy,
-      PtrDiffTy, BoolTy, nullptr);
+                     PtrDiffTy, BoolTy);
   // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL)
   SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy,
-      PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr);
+                     PtrDiffTy, IdTy, BoolTy, BoolTy);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
-  GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, nullptr);
+  GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy,
+                           PtrDiffTy, BoolTy, BoolTy);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
-  SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, nullptr);
+  SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy,
+                           PtrDiffTy, BoolTy, BoolTy);
 
   // IMP type
   llvm::Type *IMPArgs[] = { IdTy, SelectorTy };
@@ -997,21 +994,19 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
     // Get functions needed in GC mode
 
     // id objc_assign_ivar(id, id, ptrdiff_t);
-    IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy,
-        nullptr);
+    IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy);
     // id objc_assign_strongCast (id, id*)
     StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy,
-        PtrToIdTy, nullptr);
+                            PtrToIdTy);
     // id objc_assign_global(id, id*);
-    GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy,
-        nullptr);
+    GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy);
     // id objc_assign_weak(id, id*);
-    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr);
+    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy);
     // id objc_read_weak(id*);
-    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr);
+    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy);
     // void *objc_memmove_collectable(void*, void *, size_t);
     MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy,
-        SizeTy, nullptr);
+                   SizeTy);
   }
 }
 
@@ -1317,7 +1312,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
     }
   }
   // Cast the pointer to a simplified version of the class structure
-  llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy, nullptr);
+  llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy);
   ReceiverClass = Builder.CreateBitCast(ReceiverClass,
                                         llvm::PointerType::getUnqual(CastTy));
   // Get the superclass pointer
@@ -1326,8 +1321,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   ReceiverClass =
     Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
   // Construct the structure used to look up the IMP
-  llvm::StructType *ObjCSuperTy = llvm::StructType::get(
-      Receiver->getType(), IdTy, nullptr);
+  llvm::StructType *ObjCSuperTy =
+      llvm::StructType::get(Receiver->getType(), IdTy);
 
   // FIXME: Is this really supposed to be a dynamic alloca?
   Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy),
@@ -1565,11 +1560,8 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
   IvarList.addInt(IntTy, (int)IvarNames.size());
 
   // Get the ivar structure type.
-  llvm::StructType *ObjCIvarTy = llvm::StructType::get(
-    PtrToInt8Ty,
-    PtrToInt8Ty,
-    IntTy,
-    nullptr);
+  llvm::StructType *ObjCIvarTy =
+      llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, IntTy);
 
   // Array of ivar structures.
   auto Ivars = IvarList.beginArray(ObjCIvarTy);
@@ -1611,7 +1603,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
   // anyway; the classes will still work with the GNU runtime, they will just
   // be ignored.
   llvm::StructType *ClassTy = llvm::StructType::get(
-      PtrToInt8Ty,        // isa 
+      PtrToInt8Ty,        // isa
       PtrToInt8Ty,        // super_class
       PtrToInt8Ty,        // name
       LongTy,             // version
@@ -1620,18 +1612,18 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
       IVars->getType(),   // ivars
       Methods->getType(), // methods
       // These are all filled in by the runtime, so we pretend
-      PtrTy,              // dtable
-      PtrTy,              // subclass_list
-      PtrTy,              // sibling_class
-      PtrTy,              // protocols
-      PtrTy,              // gc_object_type
+      PtrTy, // dtable
+      PtrTy, // subclass_list
+      PtrTy, // sibling_class
+      PtrTy, // protocols
+      PtrTy, // gc_object_type
       // New ABI:
       LongTy,                 // abi_version
       IvarOffsets->getType(), // ivar_offsets
       Properties->getType(),  // properties
       IntPtrTy,               // strong_pointers
-      IntPtrTy,               // weak_pointers
-      nullptr);
+      IntPtrTy                // weak_pointers
+      );
 
   ConstantInitBuilder Builder(CGM);
   auto Elements = Builder.beginStruct(ClassTy);
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 70d24b791334..9c048423285b 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -105,8 +105,8 @@ class ObjCCommonTypesHelper {
   llvm::Constant *getMessageSendFp2retFn() const {
     llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
     llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext);
-    llvm::Type *resultType = 
-      llvm::StructType::get(longDoubleType, longDoubleType, nullptr);
+    llvm::Type *resultType =
+        llvm::StructType::get(longDoubleType, longDoubleType);
 
     return CGM.CreateRuntimeFunction(llvm::FunctionType::get(resultType,
                                                              params, true),
@@ -5506,17 +5506,15 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *name;
   //   char *attributes;
   // }
-  PropertyTy = llvm::StructType::create("struct._prop_t",
-                                        Int8PtrTy, Int8PtrTy, nullptr);
+  PropertyTy = llvm::StructType::create("struct._prop_t", Int8PtrTy, Int8PtrTy);
 
   // struct _prop_list_t {
   //   uint32_t entsize;      // sizeof(struct _prop_t)
   //   uint32_t count_of_properties;
   //   struct _prop_t prop_list[count_of_properties];
   // }
-  PropertyListTy =
-    llvm::StructType::create("struct._prop_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(PropertyTy, 0), nullptr);
+  PropertyListTy = llvm::StructType::create(
+      "struct._prop_list_t", IntTy, IntTy, llvm::ArrayType::get(PropertyTy, 0));
   // struct _prop_list_t *
   PropertyListPtrTy = llvm::PointerType::getUnqual(PropertyListTy);
 
@@ -5525,9 +5523,8 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *method_type;
   //   char *_imp;
   // }
-  MethodTy = llvm::StructType::create("struct._objc_method",
-                                      SelectorPtrTy, Int8PtrTy, Int8PtrTy,
-                                      nullptr);
+  MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy,
+                                      Int8PtrTy, Int8PtrTy);
 
   // struct _objc_cache *
   CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
@@ -5540,17 +5537,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   SEL name;
   //   char *types;
   // }
-  MethodDescriptionTy =
-    llvm::StructType::create("struct._objc_method_description",
-                             SelectorPtrTy, Int8PtrTy, nullptr);
+  MethodDescriptionTy = llvm::StructType::create(
+      "struct._objc_method_description", SelectorPtrTy, Int8PtrTy);
 
   // struct _objc_method_description_list {
   //   int count;
   //   struct _objc_method_description[1];
   // }
-  MethodDescriptionListTy = llvm::StructType::create(
-      "struct._objc_method_description_list", IntTy,
-      llvm::ArrayType::get(MethodDescriptionTy, 0), nullptr);
+  MethodDescriptionListTy =
+      llvm::StructType::create("struct._objc_method_description_list", IntTy,
+                               llvm::ArrayType::get(MethodDescriptionTy, 0));
 
   // struct _objc_method_description_list *
   MethodDescriptionListPtrTy =
@@ -5566,11 +5562,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   const char ** extendedMethodTypes;
   //   struct _objc_property_list *class_properties;
   // }
-  ProtocolExtensionTy =
-    llvm::StructType::create("struct._objc_protocol_extension",
-                             IntTy, MethodDescriptionListPtrTy,
-                             MethodDescriptionListPtrTy, PropertyListPtrTy,
-                             Int8PtrPtrTy, PropertyListPtrTy, nullptr);
+  ProtocolExtensionTy = llvm::StructType::create(
+      "struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy,
+      MethodDescriptionListPtrTy, PropertyListPtrTy, Int8PtrPtrTy,
+      PropertyListPtrTy);
 
   // struct _objc_protocol_extension *
   ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy);
@@ -5582,10 +5577,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
 
   ProtocolListTy =
     llvm::StructType::create(VMContext, "struct._objc_protocol_list");
-  ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy),
-                          LongTy,
-                          llvm::ArrayType::get(ProtocolTy, 0),
-                          nullptr);
+  ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy,
+                          llvm::ArrayType::get(ProtocolTy, 0));
 
   // struct _objc_protocol {
   //   struct _objc_protocol_extension *isa;
@@ -5596,9 +5589,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   // }
   ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy,
                       llvm::PointerType::getUnqual(ProtocolListTy),
-                      MethodDescriptionListPtrTy,
-                      MethodDescriptionListPtrTy,
-                      nullptr);
+                      MethodDescriptionListPtrTy, MethodDescriptionListPtrTy);
 
   // struct _objc_protocol_list *
   ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy);
@@ -5612,8 +5603,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *ivar_type;
   //   int  ivar_offset;
   // }
-  IvarTy = llvm::StructType::create("struct._objc_ivar",
-                                    Int8PtrTy, Int8PtrTy, IntTy, nullptr);
+  IvarTy = llvm::StructType::create("struct._objc_ivar", Int8PtrTy, Int8PtrTy,
+                                    IntTy);
 
   // struct _objc_ivar_list *
   IvarListTy =
@@ -5626,9 +5617,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   MethodListPtrTy = llvm::PointerType::getUnqual(MethodListTy);
 
   // struct _objc_class_extension *
-  ClassExtensionTy =
-    llvm::StructType::create("struct._objc_class_extension",
-                             IntTy, Int8PtrTy, PropertyListPtrTy, nullptr);
+  ClassExtensionTy = llvm::StructType::create(
+      "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy);
   ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy);
 
   ClassTy = llvm::StructType::create(VMContext, "struct._objc_class");
@@ -5648,18 +5638,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   struct _objc_class_ext *ext;
   // };
   ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy),
-                   llvm::PointerType::getUnqual(ClassTy),
-                   Int8PtrTy,
-                   LongTy,
-                   LongTy,
-                   LongTy,
-                   IvarListPtrTy,
-                   MethodListPtrTy,
-                   CachePtrTy,
-                   ProtocolListPtrTy,
-                   Int8PtrTy,
-                   ClassExtensionPtrTy,
-                   nullptr);
+                   llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy,
+                   LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy,
+                   ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy);
 
   ClassPtrTy = llvm::PointerType::getUnqual(ClassTy);
 
@@ -5673,12 +5654,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   struct _objc_property_list *instance_properties;// category's @property
   //   struct _objc_property_list *class_properties;
   // }
-  CategoryTy =
-    llvm::StructType::create("struct._objc_category",
-                             Int8PtrTy, Int8PtrTy, MethodListPtrTy,
-                             MethodListPtrTy, ProtocolListPtrTy,
-                             IntTy, PropertyListPtrTy, PropertyListPtrTy,
-                             nullptr);
+  CategoryTy = llvm::StructType::create(
+      "struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy,
+      MethodListPtrTy, ProtocolListPtrTy, IntTy, PropertyListPtrTy,
+      PropertyListPtrTy);
 
   // Global metadata structures
 
@@ -5689,10 +5668,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   short cat_def_cnt;
   //   char *defs[cls_def_cnt + cat_def_cnt];
   // }
-  SymtabTy =
-    llvm::StructType::create("struct._objc_symtab",
-                             LongTy, SelectorPtrTy, ShortTy, ShortTy,
-                             llvm::ArrayType::get(Int8PtrTy, 0), nullptr);
+  SymtabTy = llvm::StructType::create("struct._objc_symtab", LongTy,
+                                      SelectorPtrTy, ShortTy, ShortTy,
+                                      llvm::ArrayType::get(Int8PtrTy, 0));
   SymtabPtrTy = llvm::PointerType::getUnqual(SymtabTy);
 
   // struct _objc_module {
@@ -5701,10 +5679,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *name;
   //   struct _objc_symtab* symtab;
   //  }
-  ModuleTy =
-    llvm::StructType::create("struct._objc_module",
-                             LongTy, LongTy, Int8PtrTy, SymtabPtrTy, nullptr);
-
+  ModuleTy = llvm::StructType::create("struct._objc_module", LongTy, LongTy,
+                                      Int8PtrTy, SymtabPtrTy);
 
   // FIXME: This is the size of the setjmp buffer and should be target
   // specific. 18 is what's used on 32-bit X86.
@@ -5713,10 +5689,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   // Exceptions
   llvm::Type *StackPtrTy = llvm::ArrayType::get(CGM.Int8PtrTy, 4);
 
-  ExceptionDataTy =
-    llvm::StructType::create("struct._objc_exception_data",
-                             llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize),
-                             StackPtrTy, nullptr);
+  ExceptionDataTy = llvm::StructType::create(
+      "struct._objc_exception_data",
+      llvm::ArrayType::get(CGM.Int32Ty, SetJmpBufferSize), StackPtrTy);
 }
 
 ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5727,8 +5702,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   struct _objc_method method_list[method_count];
   // }
   MethodListnfABITy =
-    llvm::StructType::create("struct.__method_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(MethodTy, 0), nullptr);
+      llvm::StructType::create("struct.__method_list_t", IntTy, IntTy,
+                               llvm::ArrayType::get(MethodTy, 0));
   // struct method_list_t *
   MethodListnfABIPtrTy = llvm::PointerType::getUnqual(MethodListnfABITy);
 
@@ -5752,14 +5727,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   ProtocolListnfABITy =
     llvm::StructType::create(VMContext, "struct._objc_protocol_list");
 
-  ProtocolnfABITy =
-    llvm::StructType::create("struct._protocol_t", ObjectPtrTy, Int8PtrTy,
-                             llvm::PointerType::getUnqual(ProtocolListnfABITy),
-                             MethodListnfABIPtrTy, MethodListnfABIPtrTy,
-                             MethodListnfABIPtrTy, MethodListnfABIPtrTy,
-                             PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
-                             Int8PtrTy, PropertyListPtrTy,
-                             nullptr);
+  ProtocolnfABITy = llvm::StructType::create(
+      "struct._protocol_t", ObjectPtrTy, Int8PtrTy,
+      llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy,
+      MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy,
+      PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy,
+      PropertyListPtrTy);
 
   // struct _protocol_t*
   ProtocolnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolnfABITy);
@@ -5769,8 +5742,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   struct _protocol_t *[protocol_count];
   // }
   ProtocolListnfABITy->setBody(LongTy,
-                               llvm::ArrayType::get(ProtocolnfABIPtrTy, 0),
-                               nullptr);
+                               llvm::ArrayType::get(ProtocolnfABIPtrTy, 0));
 
   // struct _objc_protocol_list*
   ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy);
@@ -5784,7 +5756,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   // }
   IvarnfABITy = llvm::StructType::create(
       "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy),
-      Int8PtrTy, Int8PtrTy, IntTy, IntTy, nullptr);
+      Int8PtrTy, Int8PtrTy, IntTy, IntTy);
 
   // struct _ivar_list_t {
   //   uint32 entsize;  // sizeof(struct _ivar_t)
@@ -5792,8 +5764,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   struct _iver_t list[count];
   // }
   IvarListnfABITy =
-    llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(IvarnfABITy, 0), nullptr);
+      llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy,
+                               llvm::ArrayType::get(IvarnfABITy, 0));
 
   IvarListnfABIPtrTy = llvm::PointerType::getUnqual(IvarListnfABITy);
 
@@ -5812,13 +5784,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   // }
 
   // FIXME. Add 'reserved' field in 64bit abi mode!
-  ClassRonfABITy = llvm::StructType::create("struct._class_ro_t",
-                                            IntTy, IntTy, IntTy, Int8PtrTy,
-                                            Int8PtrTy, MethodListnfABIPtrTy,
-                                            ProtocolListnfABIPtrTy,
-                                            IvarListnfABIPtrTy,
-                                            Int8PtrTy, PropertyListPtrTy,
-                                            nullptr);
+  ClassRonfABITy = llvm::StructType::create(
+      "struct._class_ro_t", IntTy, IntTy, IntTy, Int8PtrTy, Int8PtrTy,
+      MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, IvarListnfABIPtrTy,
+      Int8PtrTy, PropertyListPtrTy);
 
   // ImpnfABITy - LLVM for id (*)(id, SEL, ...)
   llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
@@ -5835,11 +5804,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
 
   ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t");
   ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy),
-                        llvm::PointerType::getUnqual(ClassnfABITy),
-                        CachePtrTy,
+                        llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy,
                         llvm::PointerType::getUnqual(ImpnfABITy),
-                        llvm::PointerType::getUnqual(ClassRonfABITy),
-                        nullptr);
+                        llvm::PointerType::getUnqual(ClassRonfABITy));
 
   // LLVM for struct _class_t *
   ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy);
@@ -5854,15 +5821,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const struct _prop_list_t * const class_properties;
   //   const uint32_t size;
   // }
-  CategorynfABITy = llvm::StructType::create("struct._category_t",
-                                             Int8PtrTy, ClassnfABIPtrTy,
-                                             MethodListnfABIPtrTy,
-                                             MethodListnfABIPtrTy,
-                                             ProtocolListnfABIPtrTy,
-                                             PropertyListPtrTy,
-                                             PropertyListPtrTy,
-                                             IntTy,
-                                             nullptr);
+  CategorynfABITy = llvm::StructType::create(
+      "struct._category_t", Int8PtrTy, ClassnfABIPtrTy, MethodListnfABIPtrTy,
+      MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy,
+      PropertyListPtrTy, IntTy);
 
   // New types for nonfragile abi messaging.
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
@@ -5899,9 +5861,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   SUPER_IMP messenger;
   //   SEL name;
   // };
-  SuperMessageRefTy =
-    llvm::StructType::create("struct._super_message_ref_t",
-                             ImpnfABITy, SelectorPtrTy, nullptr);
+  SuperMessageRefTy = llvm::StructType::create("struct._super_message_ref_t",
+                                               ImpnfABITy, SelectorPtrTy);
 
   // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t*
   SuperMessageRefPtrTy = llvm::PointerType::getUnqual(SuperMessageRefTy);
@@ -5912,10 +5873,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const char*  name;    // c++ typeinfo string
   //   Class        cls;
   // };
-  EHTypeTy =
-    llvm::StructType::create("struct._objc_typeinfo",
-                             llvm::PointerType::getUnqual(Int8PtrTy),
-                             Int8PtrTy, ClassnfABIPtrTy, nullptr);
+  EHTypeTy = llvm::StructType::create("struct._objc_typeinfo",
+                                      llvm::PointerType::getUnqual(Int8PtrTy),
+                                      Int8PtrTy, ClassnfABIPtrTy);
   EHTypePtrTy = llvm::PointerType::getUnqual(EHTypeTy);
 }
 
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index d1a706b8821e..b256a88c47ad 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -728,7 +728,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
   IdentTy = llvm::StructType::create(
       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
-      CGM.Int8PtrTy /* psource */, nullptr);
+      CGM.Int8PtrTy /* psource */);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 
   loadOffloadInfoMetadata();
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index f57cbe86c413..c7e30fad7575 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -548,9 +548,9 @@ void BackendConsumer::UnsupportedDiagHandler(
 
   StringRef Filename;
   unsigned Line, Column;
-  bool BadDebugInfo;
-  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
-      Line, Column);
+  bool BadDebugInfo = false;
+  FullSourceLoc Loc =
+      getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
 
   Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str();
 
@@ -572,8 +572,8 @@ void BackendConsumer::EmitOptimizationMessage(
   StringRef Filename;
   unsigned Line, Column;
   bool BadDebugInfo = false;
-  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
-      Line, Column);
+  FullSourceLoc Loc =
+      getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
 
   std::string Msg;
   raw_string_ostream MsgStream(Msg);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index ff26d80fe2b6..a0254797ea43 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -751,7 +751,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
 
   // Get the type of a ctor entry, { i32, void ()*, i8* }.
   llvm::StructType *CtorStructTy = llvm::StructType::get(
-      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr);
+      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy);
 
   // Construct the constructor and destructor arrays.
   ConstantInitBuilder builder(*this);
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index dc24b2040f04..5ed929135880 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -490,7 +490,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     llvm_unreachable("Unexpected undeduced type!");
   case Type::Complex: {
     llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType());
-    ResultType = llvm::StructType::get(EltTy, EltTy, nullptr);
+    ResultType = llvm::StructType::get(EltTy, EltTy);
     break;
   }
   case Type::LValueReference:
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index dac2d15fa406..66f51305430a 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -499,7 +499,7 @@ llvm::Type *
 ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
   if (MPT->isMemberDataPointer())
     return CGM.PtrDiffTy;
-  return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, nullptr);
+  return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy);
 }
 
 /// In the Itanium and ARM ABIs, method pointers have the form:
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 4ebbef7dfb5b..18367d1602ba 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3159,8 +3159,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
     }
   }
 
-  llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr);
-
+  llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
 
   // Verify that the second element is at an 8-byte offset.
   assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
@@ -3235,8 +3234,7 @@ classifyReturnType(QualType RetTy) const {
   case ComplexX87:
     assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
     ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
-                                    llvm::Type::getX86_FP80Ty(getVMContext()),
-                                    nullptr);
+                                    llvm::Type::getX86_FP80Ty(getVMContext()));
     break;
   }
 
@@ -3732,7 +3730,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
       CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
                                              CharUnits::fromQuantity(16));
     llvm::Type *DoubleTy = CGF.DoubleTy;
-    llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
+    llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
     llvm::Value *V;
     Address Tmp = CGF.CreateMemTemp(Ty);
     Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
@@ -4637,7 +4635,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
       llvm::Type *CoerceTy;
       if (Bits > GPRBits) {
         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
-        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
+        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
       } else
         CoerceTy =
             llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
@@ -6695,6 +6693,14 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
       return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
+    // Use indirect if the aggregate cannot fit into registers for
+    // passing arguments according to the ABI
+    unsigned Threshold = IsO32 ? 16 : 64;
+
+    if(getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(Threshold))
+      return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align), true,
+                                     getContext().getTypeAlign(Ty) / 8 > Align);
+
     // If we have reached here, aggregates are passed directly by coercing to
     // another structure type. Padding is inserted if the offset of the
     // aggregate is unaligned.
@@ -7037,13 +7043,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
   // Ignore empty records.
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
-
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size > 64)
     return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
index c298302c477c..9ab2e176845c 100644
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -563,12 +563,18 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
       }
     }
 
-    if (Arg *A = Args.getLastArg(
-            options::OPT_fsanitize_address_use_after_scope,
-            options::OPT_fno_sanitize_address_use_after_scope)) {
-      AsanUseAfterScope = A->getOption().getID() ==
-                          options::OPT_fsanitize_address_use_after_scope;
-    }
+    AsanUseAfterScope = Args.hasFlag(
+        options::OPT_fsanitize_address_use_after_scope,
+        options::OPT_fno_sanitize_address_use_after_scope, AsanUseAfterScope);
+
+    // As a workaround for a bug in gold 2.26 and earlier, dead stripping of
+    // globals in ASan is disabled by default on ELF targets.
+    // See https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+    AsanGlobalsDeadStripping =
+        !TC.getTriple().isOSBinFormatELF() ||
+        Args.hasArg(options::OPT_fsanitize_address_globals_dead_stripping);
+  } else {
+    AsanUseAfterScope = false;
   }
 
   // Parse -link-cxx-sanitizer flag.
@@ -634,7 +640,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     std::make_pair(CoverageNoPrune, "-fsanitize-coverage-no-prune")};
   for (auto F : CoverageFlags) {
     if (CoverageFeatures & F.first)
-      CmdArgs.push_back(Args.MakeArgString(F.second));
+      CmdArgs.push_back(F.second);
   }
 
   if (TC.getTriple().isOSWindows() && needsUbsanRt()) {
@@ -687,7 +693,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
                                          llvm::utostr(MsanTrackOrigins)));
 
   if (MsanUseAfterDtor)
-    CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-use-after-dtor"));
+    CmdArgs.push_back("-fsanitize-memory-use-after-dtor");
 
   // FIXME: Pass these parameters as function attributes, not as -llvm flags.
   if (!TsanMemoryAccess) {
@@ -706,17 +712,20 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   }
 
   if (CfiCrossDso)
-    CmdArgs.push_back(Args.MakeArgString("-fsanitize-cfi-cross-dso"));
+    CmdArgs.push_back("-fsanitize-cfi-cross-dso");
 
   if (Stats)
-    CmdArgs.push_back(Args.MakeArgString("-fsanitize-stats"));
+    CmdArgs.push_back("-fsanitize-stats");
 
   if (AsanFieldPadding)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
                                          llvm::utostr(AsanFieldPadding)));
 
   if (AsanUseAfterScope)
-    CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-use-after-scope"));
+    CmdArgs.push_back("-fsanitize-address-use-after-scope");
+
+  if (AsanGlobalsDeadStripping)
+    CmdArgs.push_back("-fsanitize-address-globals-dead-stripping");
 
   // MSan: Workaround for PR16386.
   // ASan: This is mainly to help LSan with cases such as
@@ -724,7 +733,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   // We can't make this conditional on -fsanitize=leak, as that flag shouldn't
   // affect compilation.
   if (Sanitizers.has(Memory) || Sanitizers.has(Address))
-    CmdArgs.push_back(Args.MakeArgString("-fno-assume-sane-operator-new"));
+    CmdArgs.push_back("-fno-assume-sane-operator-new");
 
   // Require -fvisibility= flag on non-Windows when compiling if vptr CFI is
   // enabled.
diff --git a/lib/Driver/ToolChains/Arch/Mips.cpp b/lib/Driver/ToolChains/Arch/Mips.cpp
index cd791af83220..f33542477fb5 100644
--- a/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -282,18 +282,18 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                options::OPT_mfp64)) {
     if (A->getOption().matches(options::OPT_mfp32))
-      Features.push_back(Args.MakeArgString("-fp64"));
+      Features.push_back("-fp64");
     else if (A->getOption().matches(options::OPT_mfpxx)) {
-      Features.push_back(Args.MakeArgString("+fpxx"));
-      Features.push_back(Args.MakeArgString("+nooddspreg"));
+      Features.push_back("+fpxx");
+      Features.push_back("+nooddspreg");
     } else
-      Features.push_back(Args.MakeArgString("+fp64"));
+      Features.push_back("+fp64");
   } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) {
-    Features.push_back(Args.MakeArgString("+fpxx"));
-    Features.push_back(Args.MakeArgString("+nooddspreg"));
+    Features.push_back("+fpxx");
+    Features.push_back("+nooddspreg");
   } else if (mips::isFP64ADefault(Triple, CPUName)) {
-    Features.push_back(Args.MakeArgString("+fp64"));
-    Features.push_back(Args.MakeArgString("+nooddspreg"));
+    Features.push_back("+fp64");
+    Features.push_back("+nooddspreg");
   }
 
   AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg,
diff --git a/lib/Driver/ToolChains/Myriad.cpp b/lib/Driver/ToolChains/Myriad.cpp
index 2935755c12be..f70ce93c45ce 100644
--- a/lib/Driver/ToolChains/Myriad.cpp
+++ b/lib/Driver/ToolChains/Myriad.cpp
@@ -43,15 +43,17 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
   }
   CmdArgs.push_back("-DMYRIAD2");
 
-  // Append all -I, -iquote, -isystem paths, defines/undefines,
-  // 'f' flags, optimize flags, and warning options.
+  // Append all -I, -iquote, -isystem paths, defines/undefines, 'f'
+  // flags, 'g' flags, 'M' flags, optimize flags, warning options,
+  // mcpu flags, mllvm flags, and Xclang flags.
   // These are spelled the same way in clang and moviCompile.
   Args.AddAllArgsExcept(
       CmdArgs,
       {options::OPT_I_Group, options::OPT_clang_i_Group, options::OPT_std_EQ,
        options::OPT_D, options::OPT_U, options::OPT_f_Group,
        options::OPT_f_clang_Group, options::OPT_g_Group, options::OPT_M_Group,
-       options::OPT_O_Group, options::OPT_W_Group, options::OPT_mcpu_EQ},
+       options::OPT_O_Group, options::OPT_W_Group, options::OPT_mcpu_EQ,
+       options::OPT_mllvm, options::OPT_Xclang},
       {options::OPT_fno_split_dwarf_inlining});
   Args.hasArg(options::OPT_fno_split_dwarf_inlining); // Claim it if present.
 
diff --git a/lib/Driver/ToolChains/WebAssembly.cpp b/lib/Driver/ToolChains/WebAssembly.cpp
index 123a1516f1e7..3471569b6884 100644
--- a/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/lib/Driver/ToolChains/WebAssembly.cpp
@@ -42,7 +42,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
   ArgStringList CmdArgs;
   CmdArgs.push_back("-flavor");
-  CmdArgs.push_back("ld");
+  CmdArgs.push_back("wasm");
 
   // Enable garbage collection of unused input sections by default, since code
   // size is of particular importance. This is significantly facilitated by
@@ -101,6 +101,9 @@ WebAssembly::WebAssembly(const Driver &D, const llvm::Triple &Triple,
   : ToolChain(D, Triple, Args) {
 
   assert(Triple.isArch32Bit() != Triple.isArch64Bit());
+
+  getProgramPaths().push_back(getDriver().getInstalledDir());
+
   getFilePaths().push_back(
       getDriver().SysRoot + "/lib" + (Triple.isArch32Bit() ? "32" : "64"));
 }
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 3adb72c11da8..488f9dd582f9 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -674,6 +674,8 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
       return State.Stack[State.Stack.size() - 2].LastSpace;
     return State.FirstIndent;
   }
+  if (Current.is(tok::r_paren) && State.Stack.size() > 1)
+    return State.Stack[State.Stack.size() - 2].LastSpace;
   if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
     return State.Stack[State.Stack.size() - 2].LastSpace;
   if (Current.is(tok::identifier) && Current.Next &&
@@ -920,6 +922,10 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
     NewParenState.NoLineBreak =
         NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand;
 
+    // Don't propagate AvoidBinPacking into subexpressions of arg/param lists.
+    if (*I > prec::Comma)
+      NewParenState.AvoidBinPacking = false;
+
     // Indent from 'LastSpace' unless these are fake parentheses encapsulating
     // a builder type call after 'return' or, if the alignment after opening
     // brackets is disabled.
@@ -1034,13 +1040,20 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
       NestedBlockIndent = Column;
     }
 
+    bool EndsInComma =
+        Current.MatchingParen &&
+        Current.MatchingParen->getPreviousNonComment() &&
+        Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
+
     AvoidBinPacking =
+        (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) ||
         (State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
         (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
         (Style.ExperimentalAutoDetectBinPacking &&
          (Current.PackingKind == PPK_OnePerLine ||
           (!BinPackInconclusiveFunctions &&
            Current.PackingKind == PPK_Inconclusive)));
+
     if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) {
       if (Style.ColumnLimit) {
         // If this '[' opens an ObjC call, determine whether all parameters fit
@@ -1061,6 +1074,9 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
         }
       }
     }
+
+    if (Style.Language == FormatStyle::LK_JavaScript && EndsInComma)
+      BreakBeforeParameter = true;
   }
   // Generally inherit NoLineBreak from the current scope to nested scope.
   // However, don't do this for non-empty nested blocks, dict literals and
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index f55a623a8d1f..c8677e805179 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -171,6 +171,18 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {
+  static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) {
+    IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign);
+    IO.enumCase(Value, "Left", FormatStyle::ENAS_Left);
+    IO.enumCase(Value, "Right", FormatStyle::ENAS_Right);
+
+    // For backward compatibility.
+    IO.enumCase(Value, "true", FormatStyle::ENAS_Left);
+    IO.enumCase(Value, "false", FormatStyle::ENAS_Right);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
@@ -233,6 +245,7 @@ template <> struct MappingTraits<FormatStyle> {
 
     // For backward compatibility.
     if (!IO.outputting()) {
+      IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines);
       IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
       IO.mapOptional("IndentFunctionDeclarationAfterType",
                      Style.IndentWrappedFunctionNames);
@@ -247,7 +260,7 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.AlignConsecutiveAssignments);
     IO.mapOptional("AlignConsecutiveDeclarations",
                    Style.AlignConsecutiveDeclarations);
-    IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
+    IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
@@ -498,7 +511,7 @@ FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
   LLVMStyle.Language = FormatStyle::LK_Cpp;
   LLVMStyle.AccessModifierOffset = -2;
-  LLVMStyle.AlignEscapedNewlinesLeft = false;
+  LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right;
   LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
   LLVMStyle.AlignOperands = true;
   LLVMStyle.AlignTrailingComments = true;
@@ -587,7 +600,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   GoogleStyle.Language = Language;
 
   GoogleStyle.AccessModifierOffset = -1;
-  GoogleStyle.AlignEscapedNewlinesLeft = true;
+  GoogleStyle.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
   GoogleStyle.AllowShortLoopsOnASingleLine = true;
   GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
@@ -624,9 +637,10 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
     GoogleStyle.BreakBeforeTernaryOperators = false;
-    // taze:, @tag followed by { for a lot of JSDoc tags, and @see, which is
-    // commonly followed by overlong URLs.
-    GoogleStyle.CommentPragmas = "(taze:|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)";
+    // taze:, triple slash directives (`/// <...`), @tag followed by { for a lot
+    // of JSDoc tags, and @see, which is commonly followed by overlong URLs.
+    GoogleStyle.CommentPragmas =
+        "(taze:|^/[ \t]*<|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)";
     GoogleStyle.MaxEmptyLinesToKeep = 3;
     GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
     GoogleStyle.SpacesInContainerLiterals = false;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index c274d7bf07f8..387768a6ee56 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -576,9 +576,12 @@ class AnnotatingParser {
       }
       break;
     case tok::kw_for:
-      if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous &&
-          Tok->Previous->is(tok::period))
-        break;
+      if (Style.Language == FormatStyle::LK_JavaScript)
+        if (Tok->Previous && Tok->Previous->is(tok::period))
+          break;
+        // JS' for async ( ...
+        if (CurrentToken->is(Keywords.kw_async))
+          next();
       Contexts.back().ColonIsForRangeExpr = true;
       next();
       if (!parseParens())
@@ -1034,8 +1037,9 @@ class AnnotatingParser {
     if (Style.Language == FormatStyle::LK_JavaScript) {
       if (Current.is(tok::exclaim)) {
         if (Current.Previous &&
-            (Current.Previous->isOneOf(tok::identifier, tok::r_paren,
-                                       tok::r_square, tok::r_brace) ||
+            (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
+                                       tok::r_paren, tok::r_square,
+                                       tok::r_brace) ||
              Current.Previous->Tok.isLiteral())) {
           Current.Type = TT_JsNonNullAssertion;
           return;
@@ -2248,6 +2252,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     if (Left.is(TT_JsFatArrow))
       return true;
+    // for async ( ...
+    if (Right.is(tok::l_paren) && Left.is(Keywords.kw_async) &&
+        Left.Previous && Left.Previous->is(tok::kw_for))
+      return true;
     if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
         Right.MatchingParen) {
       const FormatToken *Next = Right.MatchingParen->getNextNonComment();
@@ -2462,16 +2470,20 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
       return true;
   }
 
-  // If the last token before a '}' is a comma or a trailing comment, the
-  // intention is to insert a line break after it in order to make shuffling
-  // around entries easier.
+  // If the last token before a '}', ']', or ')' is a comma or a trailing
+  // comment, the intention is to insert a line break after it in order to make
+  // shuffling around entries easier.
   const FormatToken *BeforeClosingBrace = nullptr;
-  if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
+  if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+       (Style.Language == FormatStyle::LK_JavaScript &&
+        Left.is(tok::l_paren))) &&
       Left.BlockKind != BK_Block && Left.MatchingParen)
     BeforeClosingBrace = Left.MatchingParen->Previous;
   else if (Right.MatchingParen &&
-           Right.MatchingParen->isOneOf(tok::l_brace,
-                                        TT_ArrayInitializerLSquare))
+           (Right.MatchingParen->isOneOf(tok::l_brace,
+                                         TT_ArrayInitializerLSquare) ||
+            (Style.Language == FormatStyle::LK_JavaScript &&
+             Right.MatchingParen->is(tok::l_paren))))
     BeforeClosingBrace = &Left;
   if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
                              BeforeClosingBrace->isTrailingComment()))
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 2d788b52dfda..31c66ffb00a1 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -368,9 +368,10 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
               (Style.Language == FormatStyle::LK_JavaScript &&
                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
                                 Keywords.kw_as)) ||
+              (Style.isCpp() && NextTok->is(tok::l_paren)) ||
               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
                                tok::r_paren, tok::r_square, tok::l_brace,
-                               tok::l_square, tok::l_paren, tok::ellipsis) ||
+                               tok::l_square, tok::ellipsis) ||
               (NextTok->is(tok::identifier) &&
                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
               (NextTok->is(tok::semi) &&
@@ -476,6 +477,24 @@ static bool isGoogScope(const UnwrappedLine &Line) {
   return I->Tok->is(tok::l_paren);
 }
 
+static bool isIIFE(const UnwrappedLine &Line,
+                   const AdditionalKeywords &Keywords) {
+  // Look for the start of an immediately invoked anonymous function.
+  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
+  // This is commonly done in JavaScript to create a new, anonymous scope.
+  // Example: (function() { ... })()
+  if (Line.Tokens.size() < 3)
+    return false;
+  auto I = Line.Tokens.begin();
+  if (I->Tok->isNot(tok::l_paren))
+    return false;
+  ++I;
+  if (I->Tok->isNot(Keywords.kw_function))
+    return false;
+  ++I;
+  return I->Tok->is(tok::l_paren);
+}
+
 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
                                    const FormatToken &InitialToken) {
   if (InitialToken.is(tok::kw_namespace))
@@ -493,15 +512,16 @@ void UnwrappedLineParser::parseChildBlock() {
   FormatTok->BlockKind = BK_Block;
   nextToken();
   {
-    bool GoogScope =
-        Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
+    bool SkipIndent =
+        (Style.Language == FormatStyle::LK_JavaScript &&
+         (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
     ScopedLineState LineState(*this);
     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
                                             /*MustBeDeclaration=*/false);
-    Line->Level += GoogScope ? 0 : 1;
+    Line->Level += SkipIndent ? 0 : 1;
     parseLevel(/*HasOpeningBrace=*/true);
     flushComments(isOnNewLine(*FormatTok));
-    Line->Level -= GoogScope ? 0 : 1;
+    Line->Level -= SkipIndent ? 0 : 1;
   }
   nextToken();
 }
@@ -1615,6 +1635,10 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
          "'for', 'while' or foreach macro expected");
   nextToken();
+  // JS' for async ( ...
+  if (Style.Language == FormatStyle::LK_JavaScript &&
+      FormatTok->is(Keywords.kw_async))
+    nextToken();
   if (FormatTok->Tok.is(tok::l_paren))
     parseParens();
   if (FormatTok->Tok.is(tok::l_brace)) {
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 2c1f59324971..3b6311d15487 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -517,8 +517,11 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
 }
 
 void WhitespaceManager::alignEscapedNewlines() {
-  unsigned MaxEndOfLine =
-      Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
+  if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
+    return;
+
+  bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
+  unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
   unsigned StartOfMacro = 0;
   for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
     Change &C = Changes[i];
@@ -527,7 +530,7 @@ void WhitespaceManager::alignEscapedNewlines() {
         MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
       } else {
         alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
-        MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
+        MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
         StartOfMacro = i;
       }
     }
@@ -602,7 +605,7 @@ void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
                                           unsigned EscapedNewlineColumn) {
   if (Newlines > 0) {
     unsigned Offset =
-        std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
+        std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);
     for (unsigned i = 0; i < Newlines; ++i) {
       Text.append(EscapedNewlineColumn - Offset - 1, ' ');
       Text.append(UseCRLF ? "\\\r\n" : "\\\n");
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 96854b8fbc1a..51147b6f9499 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -778,6 +778,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
     Opts.SanitizeAddressUseAfterScope =
         A->getOption().getID() == OPT_fsanitize_address_use_after_scope;
   }
+  Opts.SanitizeAddressGlobalsDeadStripping =
+      Args.hasArg(OPT_fsanitize_address_globals_dead_stripping);
   Opts.SSPBufferSize =
       getLastArgIntValue(Args, OPT_stack_protector_buffer_size, 8, Diags);
   Opts.StackRealignment = Args.hasArg(OPT_mstackrealign);
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index cdb7aa4fb626..15d28259dcc0 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -1458,12 +1458,13 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 /// \brief Computes two dot products in parallel, using the lower and upper
 ///    halves of two [8 x float] vectors as input to the two computations, and
 ///    returning the two dot products in the lower and upper halves of the
-///    [8 x float] result. The immediate integer operand controls which input
-///    elements will contribute to the dot product, and where the final results
-///    are returned. In general, for each dot product, the four corresponding
-///    elements of the input vectors are multiplied; the first two and second
-///    two products are summed, then the two sums are added to form the final
-///    result.
+///    [8 x float] result.
+///
+///    The immediate integer operand controls which input elements will
+///    contribute to the dot product, and where the final results are returned.
+///    In general, for each dot product, the four corresponding elements of the
+///    input vectors are multiplied; the first two and second two products are
+///    summed, then the two sums are added to form the final result.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1497,15 +1498,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /* Vector shuffle */
 /// \brief Selects 8 float values from the 256-bit operands of [8 x float], as
-///    specified by the immediate value operand. The four selected elements in
-///    each operand are copied to the destination according to the bits
-///    specified in the immediate operand. The selected elements from the first
-///    256-bit operand are copied to bits [63:0] and bits [191:128] of the
-///    destination, and the selected elements from the second 256-bit operand
-///    are copied to bits [127:64] and bits [255:192] of the destination. For
-///    example, if bits [7:0] of the immediate operand contain a value of 0xFF,
-///    the 256-bit destination vector would contain the following values: b[7],
-///    b[7], a[7], a[7], b[3], b[3], a[3], a[3].
+///    specified by the immediate value operand.
+///
+///    The four selected elements in each operand are copied to the destination
+///    according to the bits specified in the immediate operand. The selected
+///    elements from the first 256-bit operand are copied to bits [63:0] and
+///    bits [191:128] of the destination, and the selected elements from the
+///    second 256-bit operand are copied to bits [127:64] and bits [255:192] of
+///    the destination. For example, if bits [7:0] of the immediate operand
+///    contain a value of 0xFF, the 256-bit destination vector would contain the
+///    following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1557,13 +1559,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
                                   12 + (((mask) >> 6) & 0x3)); })
 
 /// \brief Selects four double-precision values from the 256-bit operands of
-///    [4 x double], as specified by the immediate value operand. The selected
-///    elements from the first 256-bit operand are copied to bits [63:0] and
-///    bits [191:128] in the destination, and the selected elements from the
-///    second 256-bit operand are copied to bits [127:64] and bits [255:192] in
-///    the destination. For example, if bits [3:0] of the immediate operand
-///    contain a value of 0xF, the 256-bit destination vector would contain the
-///    following values: b[3], a[3], b[1], a[1].
+///    [4 x double], as specified by the immediate value operand.
+///
+///    The selected elements from the first 256-bit operand are copied to bits
+///    [63:0] and bits [191:128] in the destination, and the selected elements
+///    from the second 256-bit operand are copied to bits [127:64] and bits
+///    [255:192] in the destination. For example, if bits [3:0] of the immediate
+///    operand contain a value of 0xF, the 256-bit destination vector would
+///    contain the following values: b[3], a[3], b[1], a[1].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1641,9 +1644,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding double-precision values of two
 ///    128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand. Returns a [2 x double] vector consisting of
-///    two doubles corresponding to the two comparison results: zero if the
-///    comparison is false, and all 1's if the comparison is true.
+///    immediate integer operand.
+///
+///    Returns a [2 x double] vector consisting of two doubles corresponding to
+///    the two comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1699,9 +1704,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding values of two 128-bit vectors of
 ///    [4 x float], using the operation specified by the immediate integer
-///    operand. Returns a [4 x float] vector consisting of four floats
-///    corresponding to the four comparison results: zero if the comparison is
-///    false, and all 1's if the comparison is true.
+///    operand.
+///
+///    Returns a [4 x float] vector consisting of four floats corresponding to
+///    the four comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1757,9 +1764,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding double-precision values of two
 ///    256-bit vectors of [4 x double], using the operation specified by the
-///    immediate integer operand. Returns a [4 x double] vector consisting of
-///    four doubles corresponding to the four comparison results: zero if the
-///    comparison is false, and all 1's if the comparison is true.
+///    immediate integer operand.
+///
+///    Returns a [4 x double] vector consisting of four doubles corresponding to
+///    the four comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1815,9 +1824,11 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding values of two 256-bit vectors of
 ///    [8 x float], using the operation specified by the immediate integer
-///    operand. Returns a [8 x float] vector consisting of eight floats
-///    corresponding to the eight comparison results: zero if the comparison is
-///    false, and all 1's if the comparison is true.
+///    operand.
+///
+///    Returns a [8 x float] vector consisting of eight floats corresponding to
+///    the eight comparison results: zero if the comparison is false, and all
+///    1's if the comparison is true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1873,8 +1884,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding scalar double-precision values of
 ///    two 128-bit vectors of [2 x double], using the operation specified by the
-///    immediate integer operand. If the result is true, all 64 bits of the
-///    destination vector are set; otherwise they are cleared.
+///    immediate integer operand.
+///
+///    If the result is true, all 64 bits of the destination vector are set;
+///    otherwise they are cleared.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1930,8 +1943,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /// \brief Compares each of the corresponding scalar values of two 128-bit
 ///    vectors of [4 x float], using the operation specified by the immediate
-///    integer operand. If the result is true, all 32 bits of the destination
-///    vector are set; otherwise they are cleared.
+///    integer operand.
+///
+///    If the result is true, all 32 bits of the destination vector are set;
+///    otherwise they are cleared.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2536,7 +2551,9 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2563,7 +2580,9 @@ _mm_testz_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2590,7 +2609,9 @@ _mm_testc_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [2 x double], perform an
 ///    element-by-element comparison of the double-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2618,7 +2639,9 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2645,7 +2668,9 @@ _mm_testz_ps(__m128 __a, __m128 __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2672,7 +2697,9 @@ _mm_testc_ps(__m128 __a, __m128 __b)
 /// \brief Given two 128-bit floating-point vectors of [4 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2700,7 +2727,9 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2727,7 +2756,9 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2754,7 +2785,9 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [4 x double], perform an
 ///    element-by-element comparison of the double-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of double-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2782,7 +2815,9 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2809,7 +2844,9 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision element in the
 ///    first source vector and the corresponding element in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2836,7 +2873,9 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
 /// \brief Given two 256-bit floating-point vectors of [8 x float], perform an
 ///    element-by-element comparison of the single-precision elements in the
 ///    first source vector and the corresponding elements in the second source
-///    vector. The EFLAGS register is updated as follows: \n
+///    vector.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of single-precision elements where the
 ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
 ///    ZF flag is set to 1. \n
@@ -2862,7 +2901,9 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -2886,7 +2927,9 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -2910,7 +2953,9 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
 }
 
 /// \brief Given two 256-bit integer vectors, perform a bit-by-bit comparison
-///    of the two source vectors and update the EFLAGS register as follows: \n
+///    of the two source vectors.
+///
+///    The EFLAGS register is updated as follows: \n
 ///    If there is at least one pair of bits where both bits are 1, the ZF flag
 ///    is set to 0. Otherwise the ZF flag is set to 1. \n
 ///    If there is at least one pair of bits where the bit from the first source
@@ -4466,9 +4511,10 @@ _mm256_castsi256_si128(__m256i __a)
 }
 
 /// \brief Constructs a 256-bit floating-point vector of [4 x double] from a
-///    128-bit floating-point vector of [2 x double]. The lower 128 bits
-///    contain the value of the source vector. The contents of the upper 128
-///    bits are undefined.
+///    128-bit floating-point vector of [2 x double].
+///
+///    The lower 128 bits contain the value of the source vector. The contents
+///    of the upper 128 bits are undefined.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -4486,9 +4532,10 @@ _mm256_castpd128_pd256(__m128d __a)
 }
 
 /// \brief Constructs a 256-bit floating-point vector of [8 x float] from a
-///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
-///    the value of the source vector. The contents of the upper 128 bits are
-///    undefined.
+///    128-bit floating-point vector of [4 x float].
+///
+///    The lower 128 bits contain the value of the source vector. The contents
+///    of the upper 128 bits are undefined.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -4506,6 +4553,7 @@ _mm256_castps128_ps256(__m128 __a)
 }
 
 /// \brief Constructs a 256-bit integer vector from a 128-bit integer vector.
+///
 ///    The lower 128 bits contain the value of the source vector. The contents
 ///    of the upper 128 bits are undefined.
 ///
@@ -4586,8 +4634,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit vector of [8 x float] by first duplicating
 ///    a 256-bit vector of [8 x float] given in the first parameter, and then
 ///    replacing either the upper or the lower 128 bits with the contents of a
-///    128-bit vector of [4 x float] in the second parameter. The immediate
-///    integer parameter determines between the upper or the lower 128 bits.
+///    128-bit vector of [4 x float] in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -4631,8 +4681,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit vector of [4 x double] by first duplicating
 ///    a 256-bit vector of [4 x double] given in the first parameter, and then
 ///    replacing either the upper or the lower 128 bits with the contents of a
-///    128-bit vector of [2 x double] in the second parameter. The immediate
-///    integer parameter determines between the upper or the lower 128 bits.
+///    128-bit vector of [2 x double] in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -4672,8 +4724,10 @@ _mm256_zextsi128_si256(__m128i __a)
 /// \brief Constructs a new 256-bit integer vector by first duplicating a
 ///    256-bit integer vector given in the first parameter, and then replacing
 ///    either the upper or the lower 128 bits with the contents of a 128-bit
-///    integer vector in the second parameter. The immediate integer parameter
-///    determines between the upper or the lower 128 bits.
+///    integer vector in the second parameter.
+///
+///    The immediate integer parameter determines between the upper or the lower
+///    128 bits.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 13b0db22ec44..fa5cefadc52c 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -462,8 +462,9 @@ _mm_cmplt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are less than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are less than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -482,8 +483,9 @@ _mm_cmple_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are greater than those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -502,8 +504,9 @@ _mm_cmpgt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are greater than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are greater than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -522,9 +525,10 @@ _mm_cmpge_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are ordered with respect to those in the second operand. A pair
-///    of double-precision values are "ordered" with respect to each other if
-///    neither value is a NaN. Each comparison yields 0h for false,
+///    operand are ordered with respect to those in the second operand.
+///
+///    A pair of double-precision values are "ordered" with respect to each
+///    other if neither value is a NaN. Each comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
@@ -544,9 +548,10 @@ _mm_cmpord_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are unordered with respect to those in the second operand. A pair
-///    of double-precision values are "unordered" with respect to each other if
-///    one or both values are NaN. Each comparison yields 0h for false,
+///    operand are unordered with respect to those in the second operand.
+///
+///    A pair of double-precision values are "unordered" with respect to each
+///    other if one or both values are NaN. Each comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
@@ -567,8 +572,9 @@ _mm_cmpunord_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are unequal to those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are unequal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -587,8 +593,9 @@ _mm_cmpneq_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not less than those in the second operand. Each comparison
-///    yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -607,8 +614,9 @@ _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not less than or equal to those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not less than or equal to those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -627,8 +635,9 @@ _mm_cmpnle_pd(__m128d __a, __m128d __b)
 
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
-///    operand are not greater than those in the second operand. Each
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    operand are not greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -648,6 +657,7 @@ _mm_cmpngt_pd(__m128d __a, __m128d __b)
 /// \brief Compares each of the corresponding double-precision values of the
 ///    128-bit vectors of [2 x double] to determine if the values in the first
 ///    operand are not greater than or equal to those in the second operand.
+///
 ///    Each comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
@@ -666,8 +676,9 @@ _mm_cmpnge_pd(__m128d __a, __m128d __b)
 }
 
 /// \brief Compares the lower double-precision floating-point values in each of
-///    the two 128-bit floating-point vectors of [2 x double] for equality. The
-///    comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
+///    the two 128-bit floating-point vectors of [2 x double] for equality.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -690,8 +701,9 @@ _mm_cmpeq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -714,8 +726,9 @@ _mm_cmplt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -738,8 +751,9 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -763,8 +777,9 @@ _mm_cmpgt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -788,9 +803,11 @@ _mm_cmpge_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is "ordered" with respect to the
-///    corresponding value in the second parameter. The comparison yields 0h for
-///    false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
-///    "ordered" with respect to each other if neither value is a NaN.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+///    double-precision values are "ordered" with respect to each other if
+///    neither value is a NaN.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -813,9 +830,11 @@ _mm_cmpord_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is "unordered" with respect to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
-///    are "unordered" with respect to each other if one or both values are NaN.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true. A pair of
+///    double-precision values are "unordered" with respect to each other if one
+///    or both values are NaN.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -839,8 +858,9 @@ _mm_cmpunord_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -863,8 +883,9 @@ _mm_cmpneq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not less than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -887,8 +908,9 @@ _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -911,8 +933,9 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not greater than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
-///    FFFFFFFFFFFFFFFFh for true.
+///    value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -936,8 +959,9 @@ _mm_cmpngt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is not greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0h
-///    for false, FFFFFFFFFFFFFFFFh for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0h for false, FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -982,7 +1006,9 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1004,8 +1030,9 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1027,7 +1054,9 @@ _mm_comile_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0 for false, 1 for true.
+///    in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1049,8 +1078,9 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1072,7 +1102,9 @@ _mm_comige_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1093,8 +1125,9 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
 
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] for equality. The
-///    comparison yields 0 for false, 1 for true. If either of the two lower
-///    double-precision values is NaN, 1 is returned.
+///    comparison yields 0 for false, 1 for true.
+///
+///    If either of the two lower double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1117,8 +1150,10 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true. If
-///    either of the two lower double-precision values is NaN, 1 is returned.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1141,9 +1176,10 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true. If either of the two lower double-precision values is
-///    NaN, 1 is returned.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1166,8 +1202,10 @@ _mm_ucomile_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than the corresponding value
-///    in the second parameter. The comparison yields 0 for false, 1 for true.
-///    If either of the two lower double-precision values is NaN, 0 is returned.
+///    in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1190,9 +1228,10 @@ _mm_ucomigt_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is greater than or equal to the
-///    corresponding value in the second parameter. The comparison yields 0 for
-///    false, 1 for true.  If either of the two lower double-precision values
-///    is NaN, 0 is returned.
+///    corresponding value in the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true.  If either of the two
+///    lower double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1215,8 +1254,10 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is unequal to the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true. If
-///    either of the two lower double-precision values is NaN, 0 is returned.
+///    the second parameter.
+///
+///    The comparison yields 0 for false, 1 for true. If either of the two lower
+///    double-precision values is NaN, 0 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1278,8 +1319,9 @@ _mm_cvtps_pd(__m128 __a)
 
 /// \brief Converts the lower two integer elements of a 128-bit vector of
 ///    [4 x i32] into two double-precision floating-point values, returned in a
-///    128-bit vector of [2 x double]. The upper two elements of the input
-///    vector are unused.
+///    128-bit vector of [2 x double].
+///
+///    The upper two elements of the input vector are unused.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1287,7 +1329,9 @@ _mm_cvtps_pd(__m128 __a)
 ///
 /// \param __a
 ///    A 128-bit integer vector of [4 x i32]. The lower two integer elements are
-///    converted to double-precision values. The upper two elements are unused.
+///    converted to double-precision values.
+///
+///    The upper two elements are unused.
 /// \returns A 128-bit vector of [2 x double] containing the converted values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
@@ -1409,10 +1453,11 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
 
 /// \brief Converts the two double-precision floating-point elements of a
 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
-///    returned in the lower 64 bits of a 128-bit vector of [4 x i32]. If the
-///    result of either conversion is inexact, the result is truncated (rounded
-///    towards zero) regardless of the current MXCSR setting. The upper 64 bits
-///    of the result vector are set to zero.
+///    returned in the lower 64 bits of a 128-bit vector of [4 x i32].
+///
+///    If the result of either conversion is inexact, the result is truncated
+///    (rounded towards zero) regardless of the current MXCSR setting. The upper
+///    64 bits of the result vector are set to zero.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1466,9 +1511,10 @@ _mm_cvtpd_pi32(__m128d __a)
 
 /// \brief Converts the two double-precision floating-point elements of a
 ///    128-bit vector of [2 x double] into two signed 32-bit integer values,
-///    returned in a 64-bit vector of [2 x i32]. If the result of either
-///    conversion is inexact, the result is truncated (rounded towards zero)
-///    regardless of the current MXCSR setting.
+///    returned in a 64-bit vector of [2 x i32].
+///
+///    If the result of either conversion is inexact, the result is truncated
+///    (rounded towards zero) regardless of the current MXCSR setting.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1980,8 +2026,9 @@ _mm_storel_pd(double *__dp, __m128d __a)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [16 x i8],
 ///    saving the lower 8 bits of each sum in the corresponding element of a
-///    128-bit result vector of [16 x i8]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [16 x i8].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2001,8 +2048,9 @@ _mm_add_epi8(__m128i __a, __m128i __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [8 x i16],
 ///    saving the lower 16 bits of each sum in the corresponding element of a
-///    128-bit result vector of [8 x i16]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [8 x i16].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2022,8 +2070,9 @@ _mm_add_epi16(__m128i __a, __m128i __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [4 x i32],
 ///    saving the lower 32 bits of each sum in the corresponding element of a
-///    128-bit result vector of [4 x i32]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [4 x i32].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2061,8 +2110,9 @@ _mm_add_si64(__m64 __a, __m64 __b)
 
 /// \brief Adds the corresponding elements of two 128-bit vectors of [2 x i64],
 ///    saving the lower 64 bits of each sum in the corresponding element of a
-///    128-bit result vector of [2 x i64]. The integer elements of both
-///    parameters can be either signed or unsigned.
+///    128-bit result vector of [2 x i64].
+///
+///    The integer elements of both parameters can be either signed or unsigned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2208,10 +2258,12 @@ _mm_avg_epu16(__m128i __a, __m128i __b)
 /// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
 ///    vectors, producing eight intermediate 32-bit signed integer products, and
 ///    adds the consecutive pairs of 32-bit products to form a 128-bit signed
-///    [4 x i32] vector. For example, bits [15:0] of both parameters are
-///    multiplied producing a 32-bit product, bits [31:16] of both parameters
-///    are multiplied producing a 32-bit product, and the sum of those two
-///    products becomes bits [31:0] of the result.
+///    [4 x i32] vector.
+///
+///    For example, bits [15:0] of both parameters are multiplied producing a
+///    32-bit product, bits [31:16] of both parameters are multiplied producing
+///    a 32-bit product, and the sum of those two products becomes bits [31:0]
+///    of the result.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3146,8 +3198,9 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 16-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are greater than those in the second operand. Each comparison yields 0h
-///    for false, FFFFh for true.
+///    are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3166,8 +3219,9 @@ _mm_cmpgt_epi16(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 32-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are greater than those in the second operand. Each comparison yields 0h
-///    for false, FFFFFFFFh for true.
+///    are greater than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3186,8 +3240,9 @@ _mm_cmpgt_epi32(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
 ///    integer vectors to determine if the values in the first operand are less
-///    than those in the second operand. Each comparison yields 0h for false,
-///    FFh for true.
+///    than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3206,8 +3261,9 @@ _mm_cmplt_epi8(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 16-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are less than those in the second operand. Each comparison yields 0h for
-///    false, FFFFh for true.
+///    are less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3226,8 +3282,9 @@ _mm_cmplt_epi16(__m128i __a, __m128i __b)
 
 /// \brief Compares each of the corresponding signed 32-bit values of the
 ///    128-bit integer vectors to determine if the values in the first operand
-///    are less than those in the second operand. Each comparison yields 0h for
-///    false, FFFFFFFFh for true.
+///    are less than those in the second operand.
+///
+///    Each comparison yields 0h for false, FFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3925,10 +3982,11 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
 
 /// \brief Moves bytes selected by the mask from the first operand to the
 ///    specified unaligned memory location. When a mask bit is 1, the
-///    corresponding byte is written, otherwise it is not written. To minimize
-///    caching, the date is flagged as non-temporal (unlikely to be used again
-///    soon). Exception and trap behavior for elements not selected for storage
-///    to memory are implementation dependent.
+///    corresponding byte is written, otherwise it is not written.
+///
+///    To minimize caching, the date is flagged as non-temporal (unlikely to be
+///    used again soon). Exception and trap behavior for elements not selected
+///    for storage to memory are implementation dependent.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3972,8 +4030,10 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
 }
 
 /// \brief Stores a 128-bit floating point vector of [2 x double] to a 128-bit
-///    aligned memory location. To minimize caching, the data is flagged as
-///    non-temporal (unlikely to be used again soon).
+///    aligned memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
+///    used again soon).
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -3990,6 +4050,7 @@ _mm_stream_pd(double *__p, __m128d __a)
 }
 
 /// \brief Stores a 128-bit integer vector to a 128-bit aligned memory location.
+///
 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
@@ -4007,8 +4068,9 @@ _mm_stream_si128(__m128i *__p, __m128i __a)
   __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
 }
 
-/// \brief Stores a 32-bit integer value in the specified memory location. To
-///    minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 32-bit integer value in the specified memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile <x86intrin.h>
@@ -4026,8 +4088,9 @@ _mm_stream_si32(int *__p, int __a)
 }
 
 #ifdef __x86_64__
-/// \brief Stores a 64-bit integer value in the specified memory location. To
-///    minimize caching, the data is flagged as non-temporal (unlikely to be
+/// \brief Stores a 64-bit integer value in the specified memory location.
+///
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile <x86intrin.h>
diff --git a/lib/Headers/intrin.h b/lib/Headers/intrin.h
index 38d9407abed9..881d05c0d164 100644
--- a/lib/Headers/intrin.h
+++ b/lib/Headers/intrin.h
@@ -85,9 +85,6 @@ void __inwordstring(unsigned short, unsigned short *, unsigned long);
 void __lidt(void *);
 unsigned __int64 __ll_lshift(unsigned __int64, int);
 __int64 __ll_rshift(__int64, int);
-void __llwpcb(void *);
-unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
-void __lwpval32(unsigned int, unsigned int, unsigned int);
 unsigned int __lzcnt(unsigned int);
 unsigned short __lzcnt16(unsigned short);
 static __inline__
@@ -126,7 +123,6 @@ unsigned __int64 __readmsr(unsigned long);
 unsigned __int64 __readpmc(unsigned long);
 unsigned long __segmentlimit(unsigned long);
 void __sidt(void *);
-void *__slwpcb(void);
 static __inline__
 void __stosb(unsigned char *, unsigned char, size_t);
 static __inline__
@@ -227,8 +223,6 @@ void __incgsbyte(unsigned long);
 void __incgsdword(unsigned long);
 void __incgsqword(unsigned long);
 void __incgsword(unsigned long);
-unsigned char __lwpins64(unsigned __int64, unsigned int, unsigned int);
-void __lwpval64(unsigned __int64, unsigned int, unsigned int);
 unsigned __int64 __lzcnt64(unsigned __int64);
 static __inline__
 void __movsq(unsigned long long *, unsigned long long const *, size_t);
diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h
index 2b3618398cbf..5a7968bec842 100644
--- a/lib/Headers/mmintrin.h
+++ b/lib/Headers/mmintrin.h
@@ -608,10 +608,11 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
 ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
-///    element of the first 64-bit integer vector of [8 x i8]. If an element of
-///    the first vector is less than the corresponding element of the second
-///    vector, the result is saturated to 0. The results are packed into a
-///    64-bit integer vector of [8 x i8].
+///    element of the first 64-bit integer vector of [8 x i8].
+///
+///    If an element of the first vector is less than the corresponding element
+///    of the second vector, the result is saturated to 0. The results are
+///    packed into a 64-bit integer vector of [8 x i8].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -631,10 +632,11 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
 
 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
 ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
-///    integer element of the first 64-bit integer vector of [4 x i16]. If an
-///    element of the first vector is less than the corresponding element of the
-///    second vector, the result is saturated to 0. The results are packed into
-///    a 64-bit integer vector of [4 x i16].
+///    integer element of the first 64-bit integer vector of [4 x i16].
+///
+///    If an element of the first vector is less than the corresponding element
+///    of the second vector, the result is saturated to 0. The results are
+///    packed into a 64-bit integer vector of [4 x i16].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -657,9 +659,11 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
 ///    element of the second 64-bit integer vector of [4 x i16] and get four
 ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
 ///    The lower 32 bits of these two sums are packed into a 64-bit integer
-///    vector of [2 x i32]. For example, bits [15:0] of both parameters are
-///    multiplied, bits [31:16] of both parameters are multiplied, and the sum
-///    of both results is written to bits [31:0] of the result.
+///    vector of [2 x i32].
+///
+///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
+///    of both parameters are multiplied, and the sum of both results is written
+///    to bits [31:0] of the result.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -851,10 +855,11 @@ _mm_slli_si64(__m64 __m, int __count)
 
 /// \brief Right-shifts each 16-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are filled with the sign bit of the initial value of each 16-bit
-///    element. The 16-bit results are packed into a 64-bit integer vector of
-///    [4 x i16].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are filled with the sign bit of the initial value of each
+///    16-bit element. The 16-bit results are packed into a 64-bit integer
+///    vector of [4 x i16].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -874,6 +879,7 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are filled with the sign bit of the initial value of each
 ///    16-bit element. The 16-bit results are packed into a 64-bit integer
 ///    vector of [4 x i16].
@@ -896,10 +902,11 @@ _mm_srai_pi16(__m64 __m, int __count)
 
 /// \brief Right-shifts each 32-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are filled with the sign bit of the initial value of each 32-bit
-///    element. The 32-bit results are packed into a 64-bit integer vector of
-///    [2 x i32].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are filled with the sign bit of the initial value of each
+///    32-bit element. The 32-bit results are packed into a 64-bit integer
+///    vector of [2 x i32].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -919,6 +926,7 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are filled with the sign bit of the initial value of each
 ///    32-bit element. The 32-bit results are packed into a 64-bit integer
 ///    vector of [2 x i32].
@@ -941,9 +949,10 @@ _mm_srai_pi32(__m64 __m, int __count)
 
 /// \brief Right-shifts each 16-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are cleared. The 16-bit results are packed into a 64-bit integer
-///    vector of [4 x i16].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
+///    integer vector of [4 x i16].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -963,6 +972,7 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
 ///    integer vector of [4 x i16].
 ///
@@ -984,9 +994,10 @@ _mm_srli_pi16(__m64 __m, int __count)
 
 /// \brief Right-shifts each 32-bit integer element of the first parameter,
 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
-///    specified by the second parameter, which is a 64-bit integer. High-order
-///    bits are cleared. The 32-bit results are packed into a 64-bit integer
-///    vector of [2 x i32].
+///    specified by the second parameter, which is a 64-bit integer.
+///
+///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
+///    integer vector of [2 x i32].
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1006,6 +1017,7 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///
 ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
 ///    integer vector of [2 x i32].
 ///
@@ -1026,8 +1038,9 @@ _mm_srli_pi32(__m64 __m, int __count)
 }
 
 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
-///    specified by the second 64-bit integer parameter. High-order bits are
-///    cleared.
+///    specified by the second 64-bit integer parameter.
+///
+///    High-order bits are cleared.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1046,7 +1059,9 @@ _mm_srl_si64(__m64 __m, __m64 __count)
 
 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
 ///    number of bits specified by the second parameter, which is a 32-bit
-///    integer. High-order bits are cleared.
+///    integer.
+///
+///    High-order bits are cleared.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1140,8 +1155,9 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
 ///    [8 x i8] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1161,8 +1177,9 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
 ///    [4 x i16] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFFFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1182,8 +1199,9 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
 ///    [2 x i32] to determine if the element of the first vector is equal to the
-///    corresponding element of the second vector. The comparison yields 0 for
-///    false, 0xFFFFFFFF for true.
+///    corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1203,8 +1221,9 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
 ///    [8 x i8] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1224,8 +1243,9 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
 ///    [4 x i16] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFFFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1245,8 +1265,9 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 
 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
 ///    [2 x i32] to determine if the element of the first vector is greater than
-///    the corresponding element of the second vector. The comparison yields 0
-///    for false, 0xFFFFFFFF for true.
+///    the corresponding element of the second vector.
+///
+///    The comparison yields 0 for false, 0xFFFFFFFF for true.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Headers/opencl-c.h b/lib/Headers/opencl-c.h
index 6452d5c987f0..58c8daf3a536 100644
--- a/lib/Headers/opencl-c.h
+++ b/lib/Headers/opencl-c.h
@@ -14962,6 +14962,7 @@ float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, in
 #endif //cl_khr_gl_msaa_sharing
 
 // OpenCL Extension v2.0 s9.18 - Mipmaps
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #ifdef cl_khr_mipmap_image
 
 float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);
@@ -15037,6 +15038,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
 uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
 
 #endif //cl_khr_mipmap_image
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 /**
 * Sampler-less Image Access
@@ -15135,6 +15137,7 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co
 float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);
 #endif //cl_khr_gl_msaa_sharing
 
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #ifdef cl_khr_mipmap_image
 float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
 int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
@@ -15208,6 +15211,7 @@ float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler
 int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
 uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
 #endif //cl_khr_mipmap_image
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 // Image read functions returning half4 type
 #ifdef cl_khr_fp16
@@ -15319,6 +15323,7 @@ void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, flo
 #endif //cl_khr_depth_images
 
 // OpenCL Extension v2.0 s9.18 - Mipmaps
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #ifdef cl_khr_mipmap_image
 void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);
 void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);
@@ -15345,6 +15350,7 @@ void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 c
 void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);
 #endif
 #endif //cl_khr_mipmap_image
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 // Image write functions for half4 type
 #ifdef cl_khr_fp16
@@ -15391,6 +15397,7 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col
 void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);
 #endif //cl_khr_depth_images
 
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #ifdef cl_khr_mipmap_image
 void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);
 void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);
@@ -15417,6 +15424,7 @@ void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 c
 void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);
 #endif
 #endif //cl_khr_mipmap_image
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 // Image write functions for half4 type
 #ifdef cl_khr_fp16
@@ -15559,6 +15567,7 @@ int __ovld __cnfn get_image_depth(read_write image3d_t image);
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 // OpenCL Extension v2.0 s9.18 - Mipmaps
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 #ifdef cl_khr_mipmap_image
 /**
  * Return the image miplevels.
@@ -15574,11 +15583,9 @@ int __ovld get_image_num_mip_levels(write_only image2d_t image);
 int __ovld get_image_num_mip_levels(write_only image3d_t image);
 #endif
 
-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 int __ovld get_image_num_mip_levels(read_write image1d_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_t image);
 int __ovld get_image_num_mip_levels(read_write image3d_t image);
-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 int __ovld get_image_num_mip_levels(read_only image1d_array_t image);
 int __ovld get_image_num_mip_levels(read_only image2d_array_t image);
@@ -15590,14 +15597,13 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image);
 int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);
 int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);
 
-#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 int __ovld get_image_num_mip_levels(read_write image1d_array_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_array_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
-#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 #endif //cl_khr_mipmap_image
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
 /**
  * Return the channel data type. Valid values are:
diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h
index a479d9ed2911..559ece2e3974 100644
--- a/lib/Headers/pmmintrin.h
+++ b/lib/Headers/pmmintrin.h
@@ -31,9 +31,11 @@
   __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
 
 /// \brief Loads data from an unaligned memory location to elements in a 128-bit
-///    vector. If the address of the data is not 16-byte aligned, the
-///    instruction may read two adjacent aligned blocks of memory to retrieve
-///    the requested data.
+///    vector.
+///
+///    If the address of the data is not 16-byte aligned, the instruction may
+///    read two adjacent aligned blocks of memory to retrieve the requested
+///    data.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Headers/prfchwintrin.h b/lib/Headers/prfchwintrin.h
index a3789126ef07..b52f31da2706 100644
--- a/lib/Headers/prfchwintrin.h
+++ b/lib/Headers/prfchwintrin.h
@@ -50,8 +50,10 @@ _m_prefetch(void *__P)
 ///    the L1 data cache and sets the cache-coherency to modified. This
 ///    provides a hint to the processor that the cache line will be modified.
 ///    It is intended for use when the cache line will be written to shortly
-///    after the prefetch is performed. Note that the effect of this intrinsic
-///    is dependent on the processor implementation.
+///    after the prefetch is performed.
+///
+///    Note that the effect of this intrinsic is dependent on the processor
+///    implementation.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index 1c94aca69381..c2fa5a452bce 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -586,7 +586,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 /* SSE4 Floating Point Dot Product Instructions.  */
 /// \brief Computes the dot product of the two 128-bit vectors of [4 x float]
 ///    and returns it in the elements of the 128-bit result vector of
-///    [4 x float]. The immediate integer operand controls which input elements
+///    [4 x float].
+///
+///    The immediate integer operand controls which input elements
 ///    will contribute to the dot product, and where the final results are
 ///    returned.
 ///
@@ -620,7 +622,9 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 
 /// \brief Computes the dot product of the two 128-bit vectors of [2 x double]
 ///    and returns it in the elements of the 128-bit result vector of
-///    [2 x double]. The immediate integer operand controls which input
+///    [2 x double].
+///
+///    The immediate integer operand controls which input
 ///    elements will contribute to the dot product, and where the final results
 ///    are returned.
 ///
@@ -875,7 +879,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 /// int _mm_extract_ps(__m128 X, const int N);
 /// \endcode
 ///
-/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c> 
+/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>
 /// instruction.
 ///
 /// \param X
diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h
index 80664043a06f..042bfc7e3b0d 100644
--- a/lib/Headers/tmmintrin.h
+++ b/lib/Headers/tmmintrin.h
@@ -469,10 +469,11 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
 ///    values contained in the first source operand and packed 8-bit signed
 ///    integer values contained in the second source operand, adds pairs of
 ///    contiguous products with signed saturation, and writes the 16-bit sums to
-///    the corresponding bits in the destination. For example, bits [7:0] of
-///    both operands are multiplied, bits [15:8] of both operands are
-///    multiplied, and the sum of both results is written to bits [15:0] of the
-///    destination.
+///    the corresponding bits in the destination.
+///
+///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
+///    both operands are multiplied, and the sum of both results is written to
+///    bits [15:0] of the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -502,10 +503,11 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
 ///    values contained in the first source operand and packed 8-bit signed
 ///    integer values contained in the second source operand, adds pairs of
 ///    contiguous products with signed saturation, and writes the 16-bit sums to
-///    the corresponding bits in the destination. For example, bits [7:0] of
-///    both operands are multiplied, bits [15:8] of both operands are
-///    multiplied, and the sum of both results is written to bits [15:0] of the
-///    destination.
+///    the corresponding bits in the destination.
+///
+///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
+///    both operands are multiplied, and the sum of both results is written to
+///    bits [15:0] of the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -619,13 +621,14 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 8-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    byte in the second source is negative, calculate the two's complement of
-///    the corresponding byte in the first source, and write that value to the
-///    destination. If the byte in the second source is positive, copy the
-///    corresponding byte from the first source to the destination. If the byte
-///    in the second source is zero, clear the corresponding byte in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the byte in the second source is negative, calculate the two's
+///    complement of the corresponding byte in the first source, and write that
+///    value to the destination. If the byte in the second source is positive,
+///    copy the corresponding byte from the first source to the destination. If
+///    the byte in the second source is zero, clear the corresponding byte in
+///    the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -644,13 +647,14 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 16-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    word in the second source is negative, calculate the two's complement of
-///    the corresponding word in the first source, and write that value to the
-///    destination. If the word in the second source is positive, copy the
-///    corresponding word from the first source to the destination. If the word
-///    in the second source is zero, clear the corresponding word in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the word in the second source is negative, calculate the two's
+///    complement of the corresponding word in the first source, and write that
+///    value to the destination. If the word in the second source is positive,
+///    copy the corresponding word from the first source to the destination. If
+///    the word in the second source is zero, clear the corresponding word in
+///    the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -669,8 +673,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 32-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    doubleword in the second source is negative, calculate the two's
+///    the following actions as specified by the second source operand.
+///
+///    If the doubleword in the second source is negative, calculate the two's
 ///    complement of the corresponding word in the first source, and write that
 ///    value to the destination. If the doubleword in the second source is
 ///    positive, copy the corresponding word from the first source to the
@@ -694,13 +699,14 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
 }
 
 /// \brief For each 8-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    byte in the second source is negative, calculate the two's complement of
-///    the corresponding byte in the first source, and write that value to the
-///    destination. If the byte in the second source is positive, copy the
-///    corresponding byte from the first source to the destination. If the byte
-///    in the second source is zero, clear the corresponding byte in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the byte in the second source is negative, calculate the two's
+///    complement of the corresponding byte in the first source, and write that
+///    value to the destination. If the byte in the second source is positive,
+///    copy the corresponding byte from the first source to the destination. If
+///    the byte in the second source is zero, clear the corresponding byte in
+///    the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -719,13 +725,14 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 16-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    word in the second source is negative, calculate the two's complement of
-///    the corresponding word in the first source, and write that value to the
-///    destination. If the word in the second source is positive, copy the
-///    corresponding word from the first source to the destination. If the word
-///    in the second source is zero, clear the corresponding word in the
-///    destination.
+///    the following actions as specified by the second source operand.
+///
+///    If the word in the second source is negative, calculate the two's
+///    complement of the corresponding word in the first source, and write that
+///    value to the destination. If the word in the second source is positive,
+///    copy the corresponding word from the first source to the destination. If
+///    the word in the second source is zero, clear the corresponding word in
+///    the destination.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -744,8 +751,9 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
 }
 
 /// \brief For each 32-bit integer in the first source operand, perform one of
-///    the following actions as specified by the second source operand: If the
-///    doubleword in the second source is negative, calculate the two's
+///    the following actions as specified by the second source operand.
+///
+///    If the doubleword in the second source is negative, calculate the two's
 ///    complement of the corresponding doubleword in the first source, and
 ///    write that value to the destination. If the doubleword in the second
 ///    source is positive, copy the corresponding doubleword from the first
diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h
index ef1d02948c8b..31ee7b82dd53 100644
--- a/lib/Headers/x86intrin.h
+++ b/lib/Headers/x86intrin.h
@@ -88,6 +88,4 @@
 #include <clzerointrin.h>
 #endif
 
-/* FIXME: LWP */
-
 #endif /* __X86INTRIN_H */
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 5c312c08efb6..9773acb840a5 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -2331,8 +2331,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
 /// \brief Conditionally copies the values from each 8-bit element in the first
 ///    64-bit integer vector operand to the specified memory location, as
 ///    specified by the most significant bit in the corresponding element in the
-///    second 64-bit integer vector operand. To minimize caching, the data is
-///    flagged as non-temporal (unlikely to be used again soon).
+///    second 64-bit integer vector operand.
+///
+///    To minimize caching, the data is flagged as non-temporal
+///    (unlikely to be used again soon).
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2815,11 +2817,12 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 
 /// \brief Converts each single-precision floating-point element of a 128-bit
 ///    floating-point vector of [4 x float] into a 16-bit signed integer, and
-///    packs the results into a 64-bit integer vector of [4 x i16]. If the
-///    floating-point element is NaN or infinity, or if the floating-point
-///    element is greater than 0x7FFFFFFF or less than -0x8000, it is converted
-///    to 0x8000. Otherwise if the floating-point element is greater than
-///    0x7FFF, it is converted to 0x7FFF.
+///    packs the results into a 64-bit integer vector of [4 x i16].
+///
+///    If the floating-point element is NaN or infinity, or if the
+///    floating-point element is greater than 0x7FFFFFFF or less than -0x8000,
+///    it is converted to 0x8000. Otherwise if the floating-point element is
+///    greater than 0x7FFF, it is converted to 0x7FFF.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -2845,11 +2848,12 @@ _mm_cvtps_pi16(__m128 __a)
 /// \brief Converts each single-precision floating-point element of a 128-bit
 ///    floating-point vector of [4 x float] into an 8-bit signed integer, and
 ///    packs the results into the lower 32 bits of a 64-bit integer vector of
-///    [8 x i8]. The upper 32 bits of the vector are set to 0. If the
-///    floating-point element is NaN or infinity, or if the floating-point
-///    element is greater than 0x7FFFFFFF or less than -0x80, it is converted
-///    to 0x80. Otherwise if the floating-point element is greater than 0x7F,
-///    it is converted to 0x7F.
+///    [8 x i8]. The upper 32 bits of the vector are set to 0.
+///
+///    If the floating-point element is NaN or infinity, or if the
+///    floating-point element is greater than 0x7FFFFFFF or less than -0x80, it
+///    is converted to 0x80. Otherwise if the floating-point element is greater
+///    than 0x7F, it is converted to 0x7F.
 ///
 /// \headerfile <x86intrin.h>
 ///
diff --git a/lib/Index/IndexBody.cpp b/lib/Index/IndexBody.cpp
index 7f09290de40f..08d233fb83e0 100644
--- a/lib/Index/IndexBody.cpp
+++ b/lib/Index/IndexBody.cpp
@@ -150,6 +150,53 @@ class BodyIndexer : public RecursiveASTVisitor<BodyIndexer> {
                                     Parent, ParentDC, Roles, Relations, E);
   }
 
+  bool indexDependentReference(
+      const Expr *E, const Type *T, const DeclarationNameInfo &NameInfo,
+      llvm::function_ref<bool(const NamedDecl *ND)> Filter) {
+    if (!T)
+      return true;
+    const TemplateSpecializationType *TST =
+        T->getAs<TemplateSpecializationType>();
+    if (!TST)
+      return true;
+    TemplateName TN = TST->getTemplateName();
+    const ClassTemplateDecl *TD =
+        dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
+    if (!TD)
+      return true;
+    CXXRecordDecl *RD = TD->getTemplatedDecl();
+    if (!RD->hasDefinition())
+      return true;
+    RD = RD->getDefinition();
+    std::vector<const NamedDecl *> Symbols =
+        RD->lookupDependentName(NameInfo.getName(), Filter);
+    // FIXME: Improve overload handling.
+    if (Symbols.size() != 1)
+      return true;
+    SourceLocation Loc = NameInfo.getLoc();
+    if (Loc.isInvalid())
+      Loc = E->getLocStart();
+    SmallVector<SymbolRelation, 4> Relations;
+    SymbolRoleSet Roles = getRolesForRef(E, Relations);
+    return IndexCtx.handleReference(Symbols[0], Loc, Parent, ParentDC, Roles,
+                                    Relations, E);
+  }
+
+  bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
+    const DeclarationNameInfo &Info = E->getMemberNameInfo();
+    return indexDependentReference(
+        E, E->getBaseType().getTypePtrOrNull(), Info,
+        [](const NamedDecl *D) { return D->isCXXInstanceMember(); });
+  }
+
+  bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
+    const DeclarationNameInfo &Info = E->getNameInfo();
+    const NestedNameSpecifier *NNS = E->getQualifier();
+    return indexDependentReference(
+        E, NNS->getAsType(), Info,
+        [](const NamedDecl *D) { return !D->isCXXInstanceMember(); });
+  }
+
   bool VisitDesignatedInitExpr(DesignatedInitExpr *E) {
     for (DesignatedInitExpr::Designator &D : llvm::reverse(E->designators())) {
       if (D.isFieldDesignator() && D.getField())
diff --git a/lib/Index/IndexDecl.cpp b/lib/Index/IndexDecl.cpp
index 7de70a10b692..203862c46e98 100644
--- a/lib/Index/IndexDecl.cpp
+++ b/lib/Index/IndexDecl.cpp
@@ -52,6 +52,22 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
     return MD && !MD->isImplicit() && MD->isThisDeclarationADefinition();
   }
 
+  void handleTemplateArgumentLoc(const TemplateArgumentLoc &TALoc,
+                                 const NamedDecl *Parent,
+                                 const DeclContext *DC) {
+    const TemplateArgumentLocInfo &LocInfo = TALoc.getLocInfo();
+    switch (TALoc.getArgument().getKind()) {
+    case TemplateArgument::Expression:
+      IndexCtx.indexBody(LocInfo.getAsExpr(), Parent, DC);
+      break;
+    case TemplateArgument::Type:
+      IndexCtx.indexTypeSourceInfo(LocInfo.getAsTypeSourceInfo(), Parent, DC);
+      break;
+    default:
+      break;
+    }
+  }
+
   void handleDeclarator(const DeclaratorDecl *D,
                         const NamedDecl *Parent = nullptr,
                         bool isIBType = false) {
@@ -233,6 +249,12 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
                                  Dtor->getParent(), Dtor->getDeclContext());
       }
     }
+    // Template specialization arguments.
+    if (const ASTTemplateArgumentListInfo *TemplateArgInfo =
+            D->getTemplateSpecializationArgsAsWritten()) {
+      for (const auto &Arg : TemplateArgInfo->arguments())
+        handleTemplateArgumentLoc(Arg, D, D->getLexicalDeclContext());
+    }
 
     if (D->isThisDeclarationADefinition()) {
       const Stmt *Body = D->getBody();
@@ -522,6 +544,14 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
     return true;
   }
 
+  bool VisitNamespaceAliasDecl(const NamespaceAliasDecl *D) {
+    TRY_DECL(D, IndexCtx.handleDecl(D));
+    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), D);
+    IndexCtx.handleReference(D->getAliasedNamespace(), D->getTargetNameLoc(), D,
+                             D->getLexicalDeclContext());
+    return true;
+  }
+
   bool VisitUsingDecl(const UsingDecl *D) {
     const DeclContext *DC = D->getDeclContext()->getRedeclContext();
     const NamedDecl *Parent = dyn_cast<NamedDecl>(DC);
diff --git a/lib/Index/IndexTypeSourceInfo.cpp b/lib/Index/IndexTypeSourceInfo.cpp
index 44d1241fb930..ae27ebe6ea4c 100644
--- a/lib/Index/IndexTypeSourceInfo.cpp
+++ b/lib/Index/IndexTypeSourceInfo.cpp
@@ -141,6 +141,34 @@ class TypeIndexer : public RecursiveASTVisitor<TypeIndexer> {
     return true;
   }
 
+  bool VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
+    const DependentNameType *DNT = TL.getTypePtr();
+    const NestedNameSpecifier *NNS = DNT->getQualifier();
+    const Type *T = NNS->getAsType();
+    if (!T)
+      return true;
+    const TemplateSpecializationType *TST =
+        T->getAs<TemplateSpecializationType>();
+    if (!TST)
+      return true;
+    TemplateName TN = TST->getTemplateName();
+    const ClassTemplateDecl *TD =
+        dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
+    if (!TD)
+      return true;
+    CXXRecordDecl *RD = TD->getTemplatedDecl();
+    if (!RD->hasDefinition())
+      return true;
+    RD = RD->getDefinition();
+    DeclarationName Name(DNT->getIdentifier());
+    std::vector<const NamedDecl *> Symbols = RD->lookupDependentName(
+        Name, [](const NamedDecl *ND) { return isa<TypeDecl>(ND); });
+    if (Symbols.size() != 1)
+      return true;
+    return IndexCtx.handleReference(Symbols[0], TL.getNameLoc(), Parent,
+                                    ParentDC, SymbolRoleSet(), Relations);
+  }
+
   bool TraverseStmt(Stmt *S) {
     IndexCtx.indexBody(S, Parent, ParentDC);
     return true;
@@ -184,7 +212,7 @@ void IndexingContext::indexNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
 
   if (!DC)
     DC = Parent->getLexicalDeclContext();
-  SourceLocation Loc = NNS.getSourceRange().getBegin();
+  SourceLocation Loc = NNS.getLocalBeginLoc();
 
   switch (NNS.getNestedNameSpecifier()->getKind()) {
   case NestedNameSpecifier::Identifier:
diff --git a/lib/Index/IndexingContext.cpp b/lib/Index/IndexingContext.cpp
index 709a23657b07..5cebb198460f 100644
--- a/lib/Index/IndexingContext.cpp
+++ b/lib/Index/IndexingContext.cpp
@@ -124,6 +124,10 @@ bool IndexingContext::isTemplateImplicitInstantiation(const Decl *D) {
     TKind = FD->getTemplateSpecializationKind();
   } else if (auto *VD = dyn_cast<VarDecl>(D)) {
     TKind = VD->getTemplateSpecializationKind();
+  } else if (isa<FieldDecl>(D)) {
+    if (const auto *Parent =
+            dyn_cast<ClassTemplateSpecializationDecl>(D->getDeclContext()))
+      TKind = Parent->getSpecializationKind();
   }
   switch (TKind) {
     case TSK_Undeclared:
@@ -159,6 +163,17 @@ static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) {
     return FD->getTemplateInstantiationPattern();
   } else if (auto *VD = dyn_cast<VarDecl>(D)) {
     return VD->getTemplateInstantiationPattern();
+  } else if (const auto *FD = dyn_cast<FieldDecl>(D)) {
+    if (const auto *Parent =
+            dyn_cast<ClassTemplateSpecializationDecl>(D->getDeclContext())) {
+      const CXXRecordDecl *Pattern = Parent->getTemplateInstantiationPattern();
+      for (const NamedDecl *ND : Pattern->lookup(FD->getDeclName())) {
+        if (ND->isImplicit())
+          continue;
+        if (isa<FieldDecl>(ND))
+          return ND;
+      }
+    }
   }
   return nullptr;
 }
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index bec434085e3a..1e5deeb1919b 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -29,7 +29,6 @@ MacroInfo::MacroInfo(SourceLocation DefLoc)
     IsUsed(false),
     IsAllowRedefinitionsWithoutWarning(false),
     IsWarnIfUnused(false),
-    FromASTFile(false),
     UsedForHeaderGuard(false) {
 }
 
@@ -137,7 +136,6 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const {
   if (IsAllowRedefinitionsWithoutWarning)
     Out << " allow_redefinitions_without_warning";
   if (IsWarnIfUnused) Out << " warn_if_unused";
-  if (FromASTFile) Out << " imported";
   if (UsedForHeaderGuard) Out << " header_guard";
 
   Out << "\n    #define <macro>";
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 70d37d3d7082..6f44dc757e85 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -1485,7 +1485,19 @@ void ModuleMapParser::parseModuleDecl() {
   
   // Determine whether this (sub)module has already been defined.
   if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) {
-    if (Existing->DefinitionLoc.isInvalid() && !ActiveModule) {
+    // We might see a (re)definition of a module that we already have a
+    // definition for in two cases:
+    //  - If we loaded one definition from an AST file and we've just found a
+    //    corresponding definition in a module map file, or
+    bool LoadedFromASTFile = Existing->DefinitionLoc.isInvalid();
+    //  - If we're building a (preprocessed) module and we've just loaded the
+    //    module map file from which it was created.
+    bool ParsedAsMainInput =
+        Map.LangOpts.getCompilingModule() == LangOptions::CMK_ModuleMap &&
+        Map.LangOpts.CurrentModule == ModuleName &&
+        SourceMgr.getDecomposedLoc(ModuleNameLoc).first !=
+            SourceMgr.getDecomposedLoc(Existing->DefinitionLoc).first;
+    if (!ActiveModule && (LoadedFromASTFile || ParsedAsMainInput)) {
       // Skip the module definition.
       skipUntil(MMToken::RBrace);
       if (Tok.is(MMToken::RBrace))
@@ -1901,8 +1913,10 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
         // 'framework module FrameworkName.Private', since a 'Private.Framework'
         // does not usually exist. However, since both are currently widely used
         // for private modules, make sure we find the right path in both cases.
-        RelativePathName.resize(ActiveModule->IsFramework ? 0
-                                                          : RelativePathLength);
+        if (ActiveModule->IsFramework && ActiveModule->Name == "Private")
+          RelativePathName.clear();
+        else
+          RelativePathName.resize(RelativePathLength);
         FullPathName.resize(FullPathLength);
         llvm::sys::path::append(RelativePathName, "PrivateHeaders",
                                 Header.FileName);
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 06fee8e5b0a8..faf8809e4eb4 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -54,35 +54,12 @@ using namespace clang;
 // Utility Methods for Preprocessor Directive Handling.
 //===----------------------------------------------------------------------===//
 
-MacroInfo *Preprocessor::AllocateMacroInfo() {
-  MacroInfoChain *MIChain = BP.Allocate<MacroInfoChain>();
-  MIChain->Next = MIChainHead;
+MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
+  auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead};
   MIChainHead = MIChain;
   return &MIChain->MI;
 }
 
-MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
-  MacroInfo *MI = AllocateMacroInfo();
-  new (MI) MacroInfo(L);
-  return MI;
-}
-
-MacroInfo *Preprocessor::AllocateDeserializedMacroInfo(SourceLocation L,
-                                                       unsigned SubModuleID) {
-  static_assert(alignof(MacroInfo) >= sizeof(SubModuleID),
-                "alignment for MacroInfo is less than the ID");
-  DeserializedMacroInfoChain *MIChain =
-      BP.Allocate<DeserializedMacroInfoChain>();
-  MIChain->Next = DeserialMIChainHead;
-  DeserialMIChainHead = MIChain;
-
-  MacroInfo *MI = &MIChain->MI;
-  new (MI) MacroInfo(L);
-  MI->FromASTFile = true;
-  MI->setOwningModuleID(SubModuleID);
-  return MI;
-}
-
 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
                                                            SourceLocation Loc) {
   return new (BP) DefMacroDirective(MI, Loc);
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e409ab036535..dce8c1efda23 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -88,7 +88,7 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
       CurLexerSubmodule(nullptr), Callbacks(nullptr),
       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
-      Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
+      Record(nullptr), MIChainHead(nullptr) {
   OwnsHeaderSearch = OwnsHeaders;
   
   CounterValue = 0; // __COUNTER__ starts at 0.
@@ -169,11 +169,6 @@ Preprocessor::~Preprocessor() {
   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
   CurTokenLexer.reset();
 
-  while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
-    DeserialMIChainHead = I->Next;
-    I->~DeserializedMacroInfoChain();
-  }
-
   // Free any cached MacroArgs.
   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
     ArgList = ArgList->deallocate();
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 1465d21ac5ee..4ccee74eaa90 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -2577,9 +2577,9 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS,
   // and attempt to recover.
   ParsedType T;
   IdentifierInfo *II = Tok.getIdentifierInfo();
+  bool IsTemplateName = getLangOpts().CPlusPlus && NextToken().is(tok::less);
   Actions.DiagnoseUnknownTypeName(II, Loc, getCurScope(), SS, T,
-                                  getLangOpts().CPlusPlus &&
-                                      NextToken().is(tok::less));
+                                  IsTemplateName);
   if (T) {
     // The action has suggested that the type T could be used. Set that as
     // the type in the declaration specifiers, consume the would-be type
@@ -2604,6 +2604,13 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS,
   DS.SetRangeEnd(Tok.getLocation());
   ConsumeToken();
 
+  // Eat any following template arguments.
+  if (IsTemplateName) {
+    SourceLocation LAngle, RAngle;
+    TemplateArgList Args;
+    ParseTemplateIdAfterTemplateName(true, LAngle, Args, RAngle);
+  }
+
   // TODO: Could inject an invalid typedef decl in an enclosing scope to
   // avoid rippling error messages on subsequent uses of the same type,
   // could be useful if #include was forgotten.
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index ad7b319676e9..e6cf65e36cfa 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -1137,8 +1137,8 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
     if (!Template) {
       TemplateArgList TemplateArgs;
       SourceLocation LAngleLoc, RAngleLoc;
-      ParseTemplateIdAfterTemplateName(nullptr, IdLoc, SS, true, LAngleLoc,
-                                       TemplateArgs, RAngleLoc);
+      ParseTemplateIdAfterTemplateName(true, LAngleLoc, TemplateArgs,
+                                       RAngleLoc);
       return true;
     }
 
@@ -1530,8 +1530,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       // a class (or template thereof).
       TemplateArgList TemplateArgs;
       SourceLocation LAngleLoc, RAngleLoc;
-      if (ParseTemplateIdAfterTemplateName(
-              nullptr, NameLoc, SS, true, LAngleLoc, TemplateArgs, RAngleLoc)) {
+      if (ParseTemplateIdAfterTemplateName(true, LAngleLoc, TemplateArgs,
+                                           RAngleLoc)) {
         // We couldn't parse the template argument list at all, so don't
         // try to give any location information for the list.
         LAngleLoc = RAngleLoc = SourceLocation();
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 3e02e46ddc7d..727fd3500991 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -235,6 +235,30 @@ bool Parser::isNotExpressionStart() {
   return isKnownToBeDeclarationSpecifier();
 }
 
+/// We've parsed something that could plausibly be intended to be a template
+/// name (\p LHS) followed by a '<' token, and the following code can't possibly
+/// be an expression. Determine if this is likely to be a template-id and if so,
+/// diagnose it.
+bool Parser::diagnoseUnknownTemplateId(ExprResult LHS, SourceLocation Less) {
+  TentativeParsingAction TPA(*this);
+  // FIXME: We could look at the token sequence in a lot more detail here.
+  if (SkipUntil(tok::greater, tok::greatergreater, tok::greatergreatergreater,
+                StopAtSemi | StopBeforeMatch)) {
+    TPA.Commit();
+
+    SourceLocation Greater;
+    ParseGreaterThanInTemplateList(Greater, true, false);
+    Actions.diagnoseExprIntendedAsTemplateName(getCurScope(), LHS,
+                                               Less, Greater);
+    return true;
+  }
+
+  // There's no matching '>' token, this probably isn't supposed to be
+  // interpreted as a template-id. Parse it as an (ill-formed) comparison.
+  TPA.Revert();
+  return false;
+}
+
 static bool isFoldOperator(prec::Level Level) {
   return Level > prec::Unknown && Level != prec::Conditional;
 }
@@ -276,6 +300,16 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) {
       return LHS;
     }
 
+    // If a '<' token is followed by a type that can be a template argument and
+    // cannot be an expression, then this is ill-formed, but might be intended
+    // to be a template-id.
+    if (OpToken.is(tok::less) && Actions.mightBeIntendedToBeTemplateName(LHS) &&
+        (isKnownToBeDeclarationSpecifier() ||
+         Tok.isOneOf(tok::greater, tok::greatergreater,
+                     tok::greatergreatergreater)) &&
+        diagnoseUnknownTemplateId(LHS, OpToken.getLocation()))
+      return ExprError();
+
     // If the next token is an ellipsis, then this is a fold-expression. Leave
     // it alone so we can handle it in the paren expression.
     if (isFoldOperator(NextTokPrec) && Tok.is(tok::ellipsis)) {
@@ -2989,6 +3023,11 @@ Optional<AvailabilitySpec> Parser::ParseAvailabilitySpec() {
     return AvailabilitySpec(ConsumeToken());
   } else {
     // Parse the platform name.
+    if (Tok.is(tok::code_completion)) {
+      Actions.CodeCompleteAvailabilityPlatformName();
+      cutOffParsing();
+      return None;
+    }
     if (Tok.isNot(tok::identifier)) {
       Diag(Tok, diag::err_avail_query_expected_platform_name);
       return None;
@@ -3001,12 +3040,14 @@ Optional<AvailabilitySpec> Parser::ParseAvailabilitySpec() {
     if (Version.empty())
       return None;
 
-    StringRef Platform = PlatformIdentifier->Ident->getName();
+    StringRef GivenPlatform = PlatformIdentifier->Ident->getName();
+    StringRef Platform =
+        AvailabilityAttr::canonicalizePlatformName(GivenPlatform);
 
     if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) {
       Diag(PlatformIdentifier->Loc,
            diag::err_avail_query_unrecognized_platform_name)
-          << Platform;
+          << GivenPlatform;
       return None;
     }
 
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index 671a815911f3..56093f685617 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -2114,11 +2114,8 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS,
   // Parse the enclosed template argument list.
   SourceLocation LAngleLoc, RAngleLoc;
   TemplateArgList TemplateArgs;
-  if (Tok.is(tok::less) &&
-      ParseTemplateIdAfterTemplateName(Template, Id.StartLocation,
-                                       SS, true, LAngleLoc,
-                                       TemplateArgs,
-                                       RAngleLoc))
+  if (Tok.is(tok::less) && ParseTemplateIdAfterTemplateName(
+                               true, LAngleLoc, TemplateArgs, RAngleLoc))
     return true;
   
   if (Id.getKind() == UnqualifiedId::IK_Identifier ||
diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp
index d2b18e7c0a81..6a81e14ed496 100644
--- a/lib/Parse/ParseTemplate.cpp
+++ b/lib/Parse/ParseTemplate.cpp
@@ -886,22 +886,12 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation &RAngleLoc,
 /// list ('<' template-parameter-list [opt] '>') and placing the
 /// results into a form that can be transferred to semantic analysis.
 ///
-/// \param Template the template declaration produced by isTemplateName
-///
-/// \param TemplateNameLoc the source location of the template name
-///
-/// \param SS if non-NULL, the nested-name-specifier preceding the
-/// template name.
-///
 /// \param ConsumeLastToken if true, then we will consume the last
 /// token that forms the template-id. Otherwise, we will leave the
 /// last token in the stream (e.g., so that it can be replaced with an
 /// annotation token).
 bool
-Parser::ParseTemplateIdAfterTemplateName(TemplateTy Template,
-                                         SourceLocation TemplateNameLoc,
-                                         const CXXScopeSpec &SS,
-                                         bool ConsumeLastToken,
+Parser::ParseTemplateIdAfterTemplateName(bool ConsumeLastToken,
                                          SourceLocation &LAngleLoc,
                                          TemplateArgList &TemplateArgs,
                                          SourceLocation &RAngleLoc) {
@@ -983,9 +973,7 @@ bool Parser::AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK,
   // Parse the enclosed template argument list.
   SourceLocation LAngleLoc, RAngleLoc;
   TemplateArgList TemplateArgs;
-  bool Invalid = ParseTemplateIdAfterTemplateName(Template, 
-                                                  TemplateNameLoc,
-                                                  SS, false, LAngleLoc,
+  bool Invalid = ParseTemplateIdAfterTemplateName(false, LAngleLoc,
                                                   TemplateArgs,
                                                   RAngleLoc);
 
diff --git a/lib/Sema/CMakeLists.txt b/lib/Sema/CMakeLists.txt
index 7a5973299f91..7d9ae621c93d 100644
--- a/lib/Sema/CMakeLists.txt
+++ b/lib/Sema/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 if (MSVC)
+  set_source_files_properties(SemaDeclAttr.cpp PROPERTIES COMPILE_FLAGS /bigobj)
   set_source_files_properties(SemaExpr.cpp PROPERTIES COMPILE_FLAGS /bigobj)
 endif()
 
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 2f493fa5fbef..ca1d27e9505f 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -477,6 +477,13 @@ static bool ShouldRemoveFromUnused(Sema *SemaRef, const DeclaratorDecl *D) {
     return true;
 
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    // If this is a function template and none of its specializations is used,
+    // we should warn.
+    if (FunctionTemplateDecl *Template = FD->getDescribedFunctionTemplate())
+      for (const auto *Spec : Template->specializations())
+        if (ShouldRemoveFromUnused(SemaRef, Spec))
+          return true;
+
     // UnusedFileScopedDecls stores the first declaration.
     // The declaration may have become definition so check again.
     const FunctionDecl *DeclToCheck;
@@ -500,6 +507,13 @@ static bool ShouldRemoveFromUnused(Sema *SemaRef, const DeclaratorDecl *D) {
         VD->isUsableInConstantExpressions(SemaRef->Context))
       return true;
 
+    if (VarTemplateDecl *Template = VD->getDescribedVarTemplate())
+      // If this is a variable template and none of its specializations is used,
+      // we should warn.
+      for (const auto *Spec : Template->specializations())
+        if (ShouldRemoveFromUnused(SemaRef, Spec))
+          return true;
+
     // UnusedFileScopedDecls stores the first declaration.
     // The declaration may have become definition so check again.
     const VarDecl *DeclToCheck = VD->getDefinition();
@@ -905,10 +919,14 @@ void Sema::ActOnEndOfTranslationUnit() {
                    << /*function*/0 << DiagD->getDeclName();
           }
         } else {
-          Diag(DiagD->getLocation(),
-               isa<CXXMethodDecl>(DiagD) ? diag::warn_unused_member_function
-                                         : diag::warn_unused_function)
-                << DiagD->getDeclName();
+          if (FD->getDescribedFunctionTemplate())
+            Diag(DiagD->getLocation(), diag::warn_unused_template)
+              << /*function*/0 << DiagD->getDeclName();
+          else
+            Diag(DiagD->getLocation(),
+                 isa<CXXMethodDecl>(DiagD) ? diag::warn_unused_member_function
+                                           : diag::warn_unused_function)
+              << DiagD->getDeclName();
         }
       } else {
         const VarDecl *DiagD = cast<VarDecl>(*I)->getDefinition();
@@ -924,7 +942,11 @@ void Sema::ActOnEndOfTranslationUnit() {
             Diag(DiagD->getLocation(), diag::warn_unused_const_variable)
                 << DiagD->getDeclName();
         } else {
-          Diag(DiagD->getLocation(), diag::warn_unused_variable)
+          if (DiagD->getDescribedVarTemplate())
+            Diag(DiagD->getLocation(), diag::warn_unused_template)
+              << /*variable*/1 << DiagD->getDeclName();
+          else
+            Diag(DiagD->getLocation(), diag::warn_unused_variable)
               << DiagD->getDeclName();
         }
       }
diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp
index 7e91709e67da..7d534263f468 100644
--- a/lib/Sema/SemaCast.cpp
+++ b/lib/Sema/SemaCast.cpp
@@ -1871,7 +1871,8 @@ static bool fixOverloadedReinterpretCastExpr(Sema &Self, QualType DestType,
   // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization
   // preserves Result.
   Result = E;
-  if (!Self.resolveAndFixAddressOfOnlyViableOverloadCandidate(Result))
+  if (!Self.resolveAndFixAddressOfOnlyViableOverloadCandidate(
+          Result, /*DoFunctionPointerConversion=*/true))
     return false;
   return Result.isUsable();
 }
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index cfac3f1dc1de..8fb2f4139236 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -3869,6 +3869,41 @@ static void AddObjCProperties(
   }
 }
 
+static void AddRecordMembersCompletionResults(Sema &SemaRef,
+                                              ResultBuilder &Results, Scope *S,
+                                              QualType BaseType,
+                                              RecordDecl *RD) {
+  // Indicate that we are performing a member access, and the cv-qualifiers
+  // for the base object type.
+  Results.setObjectTypeQualifiers(BaseType.getQualifiers());
+
+  // Access to a C/C++ class, struct, or union.
+  Results.allowNestedNameSpecifiers();
+  CodeCompletionDeclConsumer Consumer(Results, SemaRef.CurContext);
+  SemaRef.LookupVisibleDecls(RD, Sema::LookupMemberName, Consumer,
+                             SemaRef.CodeCompleter->includeGlobals(),
+                             /*IncludeDependentBases=*/true);
+
+  if (SemaRef.getLangOpts().CPlusPlus) {
+    if (!Results.empty()) {
+      // The "template" keyword can follow "->" or "." in the grammar.
+      // However, we only want to suggest the template keyword if something
+      // is dependent.
+      bool IsDependent = BaseType->isDependentType();
+      if (!IsDependent) {
+        for (Scope *DepScope = S; DepScope; DepScope = DepScope->getParent())
+          if (DeclContext *Ctx = DepScope->getEntity()) {
+            IsDependent = Ctx->isDependentContext();
+            break;
+          }
+      }
+
+      if (IsDependent)
+        Results.AddResult(CodeCompletionResult("template"));
+    }
+  }
+}
+
 void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
                                            SourceLocation OpLoc, bool IsArrow,
                                            bool IsBaseExprStatement) {
@@ -3879,8 +3914,6 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
   if (ConvertedBase.isInvalid())
     return;
   Base = ConvertedBase.get();
-
-  typedef CodeCompletionResult Result;
   
   QualType BaseType = Base->getType();
 
@@ -3915,34 +3948,18 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
                         &ResultBuilder::IsMember);
   Results.EnterNewScope();
   if (const RecordType *Record = BaseType->getAs<RecordType>()) {
-    // Indicate that we are performing a member access, and the cv-qualifiers
-    // for the base object type.
-    Results.setObjectTypeQualifiers(BaseType.getQualifiers());
-    
-    // Access to a C/C++ class, struct, or union.
-    Results.allowNestedNameSpecifiers();
-    CodeCompletionDeclConsumer Consumer(Results, CurContext);
-    LookupVisibleDecls(Record->getDecl(), LookupMemberName, Consumer,
-                       CodeCompleter->includeGlobals());
-
-    if (getLangOpts().CPlusPlus) {
-      if (!Results.empty()) {
-        // The "template" keyword can follow "->" or "." in the grammar.
-        // However, we only want to suggest the template keyword if something
-        // is dependent.
-        bool IsDependent = BaseType->isDependentType();
-        if (!IsDependent) {
-          for (Scope *DepScope = S; DepScope; DepScope = DepScope->getParent())
-            if (DeclContext *Ctx = DepScope->getEntity()) {
-              IsDependent = Ctx->isDependentContext();
-              break;
-            }
-        }
-
-        if (IsDependent)
-          Results.AddResult(Result("template"));
-      }
+    AddRecordMembersCompletionResults(*this, Results, S, BaseType,
+                                      Record->getDecl());
+  } else if (const auto *TST = BaseType->getAs<TemplateSpecializationType>()) {
+    TemplateName TN = TST->getTemplateName();
+    if (const auto *TD =
+            dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl())) {
+      CXXRecordDecl *RD = TD->getTemplatedDecl();
+      AddRecordMembersCompletionResults(*this, Results, S, BaseType, RD);
     }
+  } else if (const auto *ICNT = BaseType->getAs<InjectedClassNameType>()) {
+    if (auto *RD = ICNT->getDecl())
+      AddRecordMembersCompletionResults(*this, Results, S, BaseType, RD);
   } else if (!IsArrow && BaseType->isObjCObjectPointerType()) {
     // Objective-C property reference.
     AddedPropertiesSet AddedProperties;
@@ -7811,6 +7828,23 @@ void Sema::CodeCompleteNaturalLanguage() {
                             nullptr, 0);
 }
 
+void Sema::CodeCompleteAvailabilityPlatformName() {
+  ResultBuilder Results(*this, CodeCompleter->getAllocator(),
+                        CodeCompleter->getCodeCompletionTUInfo(),
+                        CodeCompletionContext::CCC_Other);
+  Results.EnterNewScope();
+  static const char *Platforms[] = {"macOS", "iOS", "watchOS", "tvOS"};
+  for (const char *Platform : llvm::makeArrayRef(Platforms)) {
+    Results.AddResult(CodeCompletionResult(Platform));
+    Results.AddResult(CodeCompletionResult(Results.getAllocator().CopyString(
+        Twine(Platform) + "ApplicationExtension")));
+  }
+  Results.ExitScope();
+  HandleCodeCompleteResults(this, CodeCompleter,
+                            CodeCompletionContext::CCC_Other, Results.data(),
+                            Results.size());
+}
+
 void Sema::GatherGlobalCodeCompletions(CodeCompletionAllocator &Allocator,
                                        CodeCompletionTUInfo &CCTUInfo,
                  SmallVectorImpl<CodeCompletionResult> &Results) {
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index 2612023f59db..2e069a9defaa 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -64,22 +64,45 @@ namespace {
 
 class TypeNameValidatorCCC : public CorrectionCandidateCallback {
  public:
-  TypeNameValidatorCCC(bool AllowInvalid, bool WantClass=false,
-                       bool AllowTemplates=false)
-      : AllowInvalidDecl(AllowInvalid), WantClassName(WantClass),
-        AllowTemplates(AllowTemplates) {
-    WantExpressionKeywords = false;
-    WantCXXNamedCasts = false;
-    WantRemainingKeywords = false;
+   TypeNameValidatorCCC(bool AllowInvalid, bool WantClass = false,
+                        bool AllowTemplates = false,
+                        bool AllowNonTemplates = true)
+       : AllowInvalidDecl(AllowInvalid), WantClassName(WantClass),
+         AllowTemplates(AllowTemplates), AllowNonTemplates(AllowNonTemplates) {
+     WantExpressionKeywords = false;
+     WantCXXNamedCasts = false;
+     WantRemainingKeywords = false;
   }
 
   bool ValidateCandidate(const TypoCorrection &candidate) override {
     if (NamedDecl *ND = candidate.getCorrectionDecl()) {
+      if (!AllowInvalidDecl && ND->isInvalidDecl())
+        return false;
+
+      if (getAsTypeTemplateDecl(ND))
+        return AllowTemplates;
+
       bool IsType = isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND);
-      bool AllowedTemplate = AllowTemplates && getAsTypeTemplateDecl(ND);
-      return (IsType || AllowedTemplate) &&
-             (AllowInvalidDecl || !ND->isInvalidDecl());
+      if (!IsType)
+        return false;
+
+      if (AllowNonTemplates)
+        return true;
+
+      // An injected-class-name of a class template (specialization) is valid
+      // as a template or as a non-template.
+      if (AllowTemplates) {
+        auto *RD = dyn_cast<CXXRecordDecl>(ND);
+        if (!RD || !RD->isInjectedClassName())
+          return false;
+        RD = cast<CXXRecordDecl>(RD->getDeclContext());
+        return RD->getDescribedClassTemplate() ||
+               isa<ClassTemplateSpecializationDecl>(RD);
+      }
+
+      return false;
     }
+
     return !WantClassName && candidate.isKeyword();
   }
 
@@ -87,6 +110,7 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback {
   bool AllowInvalidDecl;
   bool WantClassName;
   bool AllowTemplates;
+  bool AllowNonTemplates;
 };
 
 } // end anonymous namespace
@@ -627,7 +651,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
                                    Scope *S,
                                    CXXScopeSpec *SS,
                                    ParsedType &SuggestedType,
-                                   bool AllowClassTemplates) {
+                                   bool IsTemplateName) {
   // Don't report typename errors for editor placeholders.
   if (II->isEditorPlaceholder())
     return;
@@ -639,28 +663,41 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
   if (TypoCorrection Corrected =
           CorrectTypo(DeclarationNameInfo(II, IILoc), LookupOrdinaryName, S, SS,
                       llvm::make_unique<TypeNameValidatorCCC>(
-                          false, false, AllowClassTemplates),
+                          false, false, IsTemplateName, !IsTemplateName),
                       CTK_ErrorRecovery)) {
+    // FIXME: Support error recovery for the template-name case.
+    bool CanRecover = !IsTemplateName;
     if (Corrected.isKeyword()) {
       // We corrected to a keyword.
-      diagnoseTypo(Corrected, PDiag(diag::err_unknown_typename_suggest) << II);
+      diagnoseTypo(Corrected,
+                   PDiag(IsTemplateName ? diag::err_no_template_suggest
+                                        : diag::err_unknown_typename_suggest)
+                       << II);
       II = Corrected.getCorrectionAsIdentifierInfo();
     } else {
       // We found a similarly-named type or interface; suggest that.
       if (!SS || !SS->isSet()) {
         diagnoseTypo(Corrected,
-                     PDiag(diag::err_unknown_typename_suggest) << II);
+                     PDiag(IsTemplateName ? diag::err_no_template_suggest
+                                          : diag::err_unknown_typename_suggest)
+                         << II, CanRecover);
       } else if (DeclContext *DC = computeDeclContext(*SS, false)) {
         std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
         bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
                                 II->getName().equals(CorrectedStr);
         diagnoseTypo(Corrected,
-                     PDiag(diag::err_unknown_nested_typename_suggest)
-                       << II << DC << DroppedSpecifier << SS->getRange());
+                     PDiag(IsTemplateName
+                               ? diag::err_no_member_template_suggest
+                               : diag::err_unknown_nested_typename_suggest)
+                         << II << DC << DroppedSpecifier << SS->getRange(),
+                     CanRecover);
       } else {
         llvm_unreachable("could not have corrected a typo here");
       }
 
+      if (!CanRecover)
+        return;
+
       CXXScopeSpec tmpSS;
       if (Corrected.getCorrectionSpecifier())
         tmpSS.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
@@ -675,7 +712,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
     return;
   }
 
-  if (getLangOpts().CPlusPlus) {
+  if (getLangOpts().CPlusPlus && !IsTemplateName) {
     // See if II is a class template that the user forgot to pass arguments to.
     UnqualifiedId Name;
     Name.setIdentifier(II, IILoc);
@@ -700,10 +737,13 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
   // (struct, union, enum) from Parser::ParseImplicitInt here, instead?
 
   if (!SS || (!SS->isSet() && !SS->isInvalid()))
-    Diag(IILoc, diag::err_unknown_typename) << II;
+    Diag(IILoc, IsTemplateName ? diag::err_no_template
+                               : diag::err_unknown_typename)
+        << II;
   else if (DeclContext *DC = computeDeclContext(*SS, false))
-    Diag(IILoc, diag::err_typename_nested_not_found)
-      << II << DC << SS->getRange();
+    Diag(IILoc, IsTemplateName ? diag::err_no_member_template
+                               : diag::err_typename_nested_not_found)
+        << II << DC << SS->getRange();
   else if (isDependentScopeSpecifier(*SS)) {
     unsigned DiagID = diag::err_typename_missing;
     if (getLangOpts().MSVCCompat && isMicrosoftMissingTypename(SS, S))
@@ -1488,6 +1528,11 @@ bool Sema::ShouldWarnIfUnusedFileScopedDecl(const DeclaratorDecl *D) const {
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
       return false;
+    // A non-out-of-line declaration of a member specialization was implicitly
+    // instantiated; it's the out-of-line declaration that we're interested in.
+    if (FD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
+        FD->getMemberSpecializationInfo() && !FD->isOutOfLine())
+      return false;
 
     if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
       if (MD->isVirtual() || IsDisallowedCopyOrAssign(MD))
@@ -1514,6 +1559,10 @@ bool Sema::ShouldWarnIfUnusedFileScopedDecl(const DeclaratorDecl *D) const {
     if (VD->isStaticDataMember() &&
         VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
       return false;
+    if (VD->isStaticDataMember() &&
+        VD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
+        VD->getMemberSpecializationInfo() && !VD->isOutOfLine())
+      return false;
 
     if (VD->isInline() && !isMainFileLoc(*this, VD->getLocation()))
       return false;
@@ -1972,7 +2021,7 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
     Diag(New->getLocation(), diag::err_redefinition_variably_modified_typedef)
       << Kind << NewType;
     if (Old->getLocation().isValid())
-      Diag(Old->getLocation(), diag::note_previous_definition);
+      notePreviousDefinition(Old->getLocation(), New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -1985,7 +2034,7 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
     Diag(New->getLocation(), diag::err_redefinition_different_typedef)
       << Kind << NewType << OldType;
     if (Old->getLocation().isValid())
-      Diag(Old->getLocation(), diag::note_previous_definition);
+      notePreviousDefinition(Old->getLocation(), New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -2052,7 +2101,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
     NamedDecl *OldD = OldDecls.getRepresentativeDecl();
     if (OldD->getLocation().isValid())
-      Diag(OldD->getLocation(), diag::note_previous_definition);
+      notePreviousDefinition(OldD->getLocation(), New->getLocation());
 
     return New->setInvalidDecl();
   }
@@ -2078,7 +2127,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
         New->setTypeSourceInfo(OldTD->getTypeSourceInfo());
 
       // Make the old tag definition visible.
-      makeMergedDefinitionVisible(Hidden, NewTag->getLocation());
+      makeMergedDefinitionVisible(Hidden);
 
       // If this was an unscoped enumeration, yank all of its enumerators
       // out of the scope.
@@ -2144,7 +2193,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
     Diag(New->getLocation(), diag::err_redefinition)
       << New->getDeclName();
-    Diag(Old->getLocation(), diag::note_previous_definition);
+    notePreviousDefinition(Old->getLocation(), New->getLocation());
     return New->setInvalidDecl();
   }
 
@@ -2165,7 +2214,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
 
   Diag(New->getLocation(), diag::ext_redefinition_of_typedef)
     << New->getDeclName();
-  Diag(Old->getLocation(), diag::note_previous_definition);
+  notePreviousDefinition(Old->getLocation(), New->getLocation());
 }
 
 /// DeclhasAttr - returns true if decl Declaration already has the target
@@ -2452,7 +2501,10 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
                             ? diag::err_alias_after_tentative
                             : diag::err_redefinition;
         S.Diag(VD->getLocation(), Diag) << VD->getDeclName();
-        S.Diag(Def->getLocation(), diag::note_previous_definition);
+        if (Diag == diag::err_redefinition)
+          S.notePreviousDefinition(Def->getLocation(), VD->getLocation());
+        else
+          S.Diag(Def->getLocation(), diag::note_previous_definition);
         VD->setInvalidDecl();
       }
       ++I;
@@ -2839,7 +2891,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
     } else {
       Diag(New->getLocation(), diag::err_redefinition_different_kind)
         << New->getDeclName();
-      Diag(OldD->getLocation(), diag::note_previous_definition);
+      notePreviousDefinition(OldD->getLocation(), New->getLocation());
       return true;
     }
   }
@@ -2876,7 +2928,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       !Old->hasAttr<InternalLinkageAttr>()) {
     Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
         << New->getDeclName();
-    Diag(Old->getLocation(), diag::note_previous_definition);
+    notePreviousDefinition(Old->getLocation(), New->getLocation());
     New->dropAttr<InternalLinkageAttr>();
   }
 
@@ -3604,9 +3656,9 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
   }
   if (!Old) {
     Diag(New->getLocation(), diag::err_redefinition_different_kind)
-      << New->getDeclName();
-    Diag(Previous.getRepresentativeDecl()->getLocation(),
-         diag::note_previous_definition);
+        << New->getDeclName();
+    notePreviousDefinition(Previous.getRepresentativeDecl()->getLocation(),
+                           New->getLocation());
     return New->setInvalidDecl();
   }
 
@@ -3635,7 +3687,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
       Old->getStorageClass() == SC_None &&
       !Old->hasAttr<WeakImportAttr>()) {
     Diag(New->getLocation(), diag::warn_weak_import) << New->getDeclName();
-    Diag(Old->getLocation(), diag::note_previous_definition);
+    notePreviousDefinition(Old->getLocation(), New->getLocation());
     // Remove weak_import attribute on new declaration.
     New->dropAttr<WeakImportAttr>();
   }
@@ -3644,7 +3696,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
       !Old->hasAttr<InternalLinkageAttr>()) {
     Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
         << New->getDeclName();
-    Diag(Old->getLocation(), diag::note_previous_definition);
+    notePreviousDefinition(Old->getLocation(), New->getLocation());
     New->dropAttr<InternalLinkageAttr>();
   }
 
@@ -3801,6 +3853,67 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     New->setImplicitlyInline();
 }
 
+void Sema::notePreviousDefinition(SourceLocation Old, SourceLocation New) {
+  SourceManager &SrcMgr = getSourceManager();
+  auto FNewDecLoc = SrcMgr.getDecomposedLoc(New);
+  auto FOldDecLoc = SrcMgr.getDecomposedLoc(Old);
+  auto *FNew = SrcMgr.getFileEntryForID(FNewDecLoc.first);
+  auto *FOld = SrcMgr.getFileEntryForID(FOldDecLoc.first);
+  auto &HSI = PP.getHeaderSearchInfo();
+  StringRef HdrFilename = SrcMgr.getFilename(SrcMgr.getSpellingLoc(Old));
+
+  auto noteFromModuleOrInclude = [&](SourceLocation &Loc,
+                                     SourceLocation &IncLoc) -> bool {
+    Module *Mod = nullptr;
+    // Redefinition errors with modules are common with non modular mapped
+    // headers, example: a non-modular header H in module A that also gets
+    // included directly in a TU. Pointing twice to the same header/definition
+    // is confusing, try to get better diagnostics when modules is on.
+    if (getLangOpts().Modules) {
+      auto ModLoc = SrcMgr.getModuleImportLoc(Old);
+      if (!ModLoc.first.isInvalid())
+        Mod = HSI.getModuleMap().inferModuleFromLocation(
+            FullSourceLoc(Loc, SrcMgr));
+    }
+
+    if (IncLoc.isValid()) {
+      if (Mod) {
+        Diag(IncLoc, diag::note_redefinition_modules_same_file)
+            << HdrFilename.str() << Mod->getFullModuleName();
+        if (!Mod->DefinitionLoc.isInvalid())
+          Diag(Mod->DefinitionLoc, diag::note_defined_here)
+              << Mod->getFullModuleName();
+      } else {
+        Diag(IncLoc, diag::note_redefinition_include_same_file)
+            << HdrFilename.str();
+      }
+      return true;
+    }
+
+    return false;
+  };
+
+  // Is it the same file and same offset? Provide more information on why
+  // this leads to a redefinition error.
+  bool EmittedDiag = false;
+  if (FNew == FOld && FNewDecLoc.second == FOldDecLoc.second) {
+    SourceLocation OldIncLoc = SrcMgr.getIncludeLoc(FOldDecLoc.first);
+    SourceLocation NewIncLoc = SrcMgr.getIncludeLoc(FNewDecLoc.first);
+    EmittedDiag = noteFromModuleOrInclude(Old, OldIncLoc);
+    EmittedDiag |= noteFromModuleOrInclude(New, NewIncLoc);
+
+    // If the header has no guards, emit a note suggesting one.
+    if (FOld && !HSI.isFileMultipleIncludeGuarded(FOld))
+      Diag(Old, diag::note_use_ifdef_guards);
+
+    if (EmittedDiag)
+      return;
+  }
+
+  // Redefinition coming from different files or couldn't do better above.
+  Diag(Old, diag::note_previous_definition);
+}
+
 /// We've just determined that \p Old and \p New both appear to be definitions
 /// of the same variable. Either diagnose or fix the problem.
 bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl *New) {
@@ -3816,12 +3929,12 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl *New) {
 
     // Make the canonical definition visible.
     if (auto *OldTD = Old->getDescribedVarTemplate())
-      makeMergedDefinitionVisible(OldTD, New->getLocation());
-    makeMergedDefinitionVisible(Old, New->getLocation());
+      makeMergedDefinitionVisible(OldTD);
+    makeMergedDefinitionVisible(Old);
     return false;
   } else {
     Diag(New->getLocation(), diag::err_redefinition) << New;
-    Diag(Old->getLocation(), diag::note_previous_definition);
+    notePreviousDefinition(Old->getLocation(), New->getLocation());
     New->setInvalidDecl();
     return true;
   }
@@ -6706,6 +6819,9 @@ NamedDecl *Sema::ActOnVariableDeclarator(
     return NewTemplate;
   }
 
+  if (IsMemberSpecialization && !NewVD->isInvalidDecl())
+    CompleteMemberSpecialization(NewVD, Previous);
+
   return NewVD;
 }
 
@@ -8919,12 +9035,17 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
   }
 
+  MarkUnusedFileScopedDecl(NewFD);
+
   if (getLangOpts().CPlusPlus) {
     if (FunctionTemplate) {
       if (NewFD->isInvalidDecl())
         FunctionTemplate->setInvalidDecl();
       return FunctionTemplate;
     }
+
+    if (isMemberSpecialization && !NewFD->isInvalidDecl())
+      CompleteMemberSpecialization(NewFD, Previous);
   }
 
   if (NewFD->hasAttr<OpenCLKernelAttr>()) {
@@ -8964,8 +9085,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
   }
 
-  MarkUnusedFileScopedDecl(NewFD);
-
   // Here we have an function template explicit specialization at class scope.
   // The actually specialization will be postponed to template instatiation
   // time via the ClassScopeFunctionSpecializationDecl node.
@@ -9182,7 +9301,9 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
         if (OldTemplateDecl->getTemplatedDecl()->isDeleted()) {
           FunctionDecl *const OldTemplatedDecl =
               OldTemplateDecl->getTemplatedDecl();
+          // FIXME: This assert will not hold in the presence of modules.
           assert(OldTemplatedDecl->getCanonicalDecl() == OldTemplatedDecl);
+          // FIXME: We need an update record for this AST mutation.
           OldTemplatedDecl->setDeletedAsWritten(false);
         }
       }
@@ -10273,23 +10394,36 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
   VDecl->setInit(Init);
 
   if (VDecl->isLocalVarDecl()) {
+    // Don't check the initializer if the declaration is malformed.
+    if (VDecl->isInvalidDecl()) {
+      // do nothing
+
+    // OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized.
+    // This is true even in OpenCL C++.
+    } else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) {
+      CheckForConstantInitializer(Init, DclT);
+
+    // Otherwise, C++ does not restrict the initializer.
+    } else if (getLangOpts().CPlusPlus) {
+      // do nothing
+
     // C99 6.7.8p4: All the expressions in an initializer for an object that has
     // static storage duration shall be constant expressions or string literals.
-    // C++ does not have this restriction.
-    if (!getLangOpts().CPlusPlus && !VDecl->isInvalidDecl()) {
+    } else if (VDecl->getStorageClass() == SC_Static) {
+      CheckForConstantInitializer(Init, DclT);
+
+    // C89 is stricter than C99 for aggregate initializers.
+    // C89 6.5.7p3: All the expressions [...] in an initializer list
+    // for an object that has aggregate or union type shall be
+    // constant expressions.
+    } else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
+               isa<InitListExpr>(Init)) {
       const Expr *Culprit;
-      if (VDecl->getStorageClass() == SC_Static)
-        CheckForConstantInitializer(Init, DclT);
-      // C89 is stricter than C99 for non-static aggregate types.
-      // C89 6.5.7p3: All the expressions [...] in an initializer list
-      // for an object that has aggregate or union type shall be
-      // constant expressions.
-      else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
-               isa<InitListExpr>(Init) &&
-               !Init->isConstantInitializer(Context, false, &Culprit))
+      if (!Init->isConstantInitializer(Context, false, &Culprit)) {
         Diag(Culprit->getExprLoc(),
              diag::ext_aggregate_init_not_constant)
           << Culprit->getSourceRange();
+      }
     }
   } else if (VDecl->isStaticDataMember() && !VDecl->isInline() &&
              VDecl->getLexicalDeclContext()->isRecord()) {
@@ -11023,8 +11157,7 @@ static bool hasDependentAlignment(VarDecl *VD) {
 
 /// FinalizeDeclaration - called by ParseDeclarationAfterDeclarator to perform
 /// any semantic actions necessary after any initializer has been attached.
-void
-Sema::FinalizeDeclaration(Decl *ThisDecl) {
+void Sema::FinalizeDeclaration(Decl *ThisDecl) {
   // Note that we are no longer parsing the initializer for this declaration.
   ParsingInitForAutoVars.erase(ThisDecl);
 
@@ -11189,9 +11322,8 @@ Sema::FinalizeDeclaration(Decl *ThisDecl) {
   if (DC->getRedeclContext()->isFileContext() && VD->isExternallyVisible())
     AddPushedVisibilityAttribute(VD);
 
-  // FIXME: Warn on unused templates.
-  if (VD->isFileVarDecl() && !VD->getDescribedVarTemplate() &&
-      !isa<VarTemplatePartialSpecializationDecl>(VD))
+  // FIXME: Warn on unused var template partial specializations.
+  if (VD->isFileVarDecl() && !isa<VarTemplatePartialSpecializationDecl>(VD))
     MarkUnusedFileScopedDecl(VD);
 
   // Now we have parsed the initializer and can update the table of magic
@@ -11765,9 +11897,8 @@ Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
        Definition->getNumTemplateParameterLists())) {
     SkipBody->ShouldSkip = true;
     if (auto *TD = Definition->getDescribedFunctionTemplate())
-      makeMergedDefinitionVisible(TD, FD->getLocation());
-    makeMergedDefinitionVisible(const_cast<FunctionDecl*>(Definition),
-                                FD->getLocation());
+      makeMergedDefinitionVisible(TD);
+    makeMergedDefinitionVisible(const_cast<FunctionDecl*>(Definition));
     return;
   }
 
@@ -13421,7 +13552,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                 // we already have. Make the existing definition visible and
                 // use it in place of this one.
                 SkipBody->ShouldSkip = true;
-                makeMergedDefinitionVisible(Hidden, KWLoc);
+                makeMergedDefinitionVisible(Hidden);
                 return Def;
               } else if (!IsExplicitSpecializationAfterInstantiation) {
                 // A redeclaration in function prototype scope in C isn't
@@ -13430,7 +13561,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                   Diag(NameLoc, diag::warn_redefinition_in_param_list) << Name;
                 else
                   Diag(NameLoc, diag::err_redefinition) << Name;
-                Diag(Def->getLocation(), diag::note_previous_definition);
+                notePreviousDefinition(Def->getLocation(),
+                                       NameLoc.isValid() ? NameLoc : KWLoc);
                 // If this is a redefinition, recover by making this
                 // struct be anonymous, which will make any later
                 // references get the previous definition.
@@ -13520,7 +13652,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
         // The tag name clashes with something else in the target scope,
         // issue an error and recover by making this tag be anonymous.
         Diag(NameLoc, diag::err_redefinition_different_kind) << Name;
-        Diag(PrevDecl->getLocation(), diag::note_previous_definition);
+        notePreviousDefinition(PrevDecl->getLocation(), NameLoc);
         Name = nullptr;
         Invalid = true;
       }
@@ -13753,6 +13885,9 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
   // record.
   AddPushedVisibilityAttribute(New);
 
+  if (isMemberSpecialization && !New->isInvalidDecl())
+    CompleteMemberSpecialization(New, Previous);
+
   OwnedDecl = true;
   // In C++, don't return an invalid declaration. We can't recover well from
   // the cases where we make the type anonymous.
@@ -15221,7 +15356,7 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
         Diag(IdLoc, diag::err_redefinition_of_enumerator) << Id;
       else
         Diag(IdLoc, diag::err_redefinition) << Id;
-      Diag(PrevDecl->getLocation(), diag::note_previous_definition);
+      notePreviousDefinition(PrevDecl->getLocation(), IdLoc);
       return nullptr;
     }
   }
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 97d273f6ddb6..3de792e4e406 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -7230,6 +7230,13 @@ class DiagnoseUnguardedAvailability
         SemaRef.Context.getTargetInfo().getPlatformMinVersion());
   }
 
+  bool TraverseDecl(Decl *D) {
+    // Avoid visiting nested functions to prevent duplicate warnings.
+    if (!D || isa<FunctionDecl>(D))
+      return true;
+    return Base::TraverseDecl(D);
+  }
+
   bool TraverseStmt(Stmt *S) {
     if (!S)
       return true;
@@ -7243,6 +7250,8 @@ class DiagnoseUnguardedAvailability
 
   bool TraverseIfStmt(IfStmt *If);
 
+  bool TraverseLambdaExpr(LambdaExpr *E) { return true; }
+
   bool VisitObjCMessageExpr(ObjCMessageExpr *Msg) {
     if (ObjCMethodDecl *D = Msg->getMethodDecl())
       DiagnoseDeclAvailability(
@@ -7346,7 +7355,9 @@ void DiagnoseUnguardedAvailability::DiagnoseDeclAvailability(
     llvm::raw_string_ostream FixItOS(FixItString);
     FixItOS << "if (" << (SemaRef.getLangOpts().ObjC1 ? "@available"
                                                       : "__builtin_available")
-            << "(" << SemaRef.getASTContext().getTargetInfo().getPlatformName()
+            << "("
+            << AvailabilityAttr::getPlatformNameSourceSpelling(
+                   SemaRef.getASTContext().getTargetInfo().getPlatformName())
             << " " << Introduced.getAsString() << ", *)) {\n"
             << Indentation << ExtraIndentation;
     FixitDiag << FixItHint::CreateInsertion(IfInsertionLoc, FixItOS.str());
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 849e978e2d86..14efc9672061 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -5277,6 +5277,9 @@ ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
 
     // We aren't supposed to apply this logic if there's an '&' involved.
     if (!find.HasFormOfMemberPointer) {
+      if (Expr::hasAnyTypeDependentArguments(ArgExprs))
+        return new (Context) CallExpr(
+            Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc);
       OverloadExpr *ovl = find.Expression;
       if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
         return BuildOverloadedCallExpr(
@@ -8028,6 +8031,33 @@ QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS,
   return QualType();
 }
 
+// Diagnose cases where a scalar was implicitly converted to a vector and
+// diagnose the underlying types. Otherwise, diagnose the error
+// as invalid vector logical operands for non-C++ cases.
+QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS,
+                                            ExprResult &RHS) {
+  QualType LHSType = LHS.get()->IgnoreImpCasts()->getType();
+  QualType RHSType = RHS.get()->IgnoreImpCasts()->getType();
+
+  bool LHSNatVec = LHSType->isVectorType();
+  bool RHSNatVec = RHSType->isVectorType();
+
+  if (!(LHSNatVec && RHSNatVec)) {
+    Expr *Vector = LHSNatVec ? LHS.get() : RHS.get();
+    Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get();
+    Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
+        << 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType()
+        << Vector->getSourceRange();
+    return QualType();
+  }
+
+  Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
+      << 1 << LHSType << RHSType << LHS.get()->getSourceRange()
+      << RHS.get()->getSourceRange();
+
+  return QualType();
+}
+
 /// Try to convert a value of non-vector type to a vector type by converting
 /// the type to the element type of the vector and then performing a splat.
 /// If the language is OpenCL, we only use conversions that promote scalar
@@ -8075,6 +8105,162 @@ static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar,
   return false;
 }
 
+/// Test if a (constant) integer Int can be casted to another integer type
+/// IntTy without losing precision.
+static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int,
+                                      QualType OtherIntTy) {
+  QualType IntTy = Int->get()->getType().getUnqualifiedType();
+
+  // Reject cases where the value of the Int is unknown as that would
+  // possibly cause truncation, but accept cases where the scalar can be
+  // demoted without loss of precision.
+  llvm::APSInt Result;
+  bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context);
+  int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy);
+  bool IntSigned = IntTy->hasSignedIntegerRepresentation();
+  bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation();
+
+  if (CstInt) {
+    // If the scalar is constant and is of a higher order and has more active
+    // bits that the vector element type, reject it.
+    unsigned NumBits = IntSigned
+                           ? (Result.isNegative() ? Result.getMinSignedBits()
+                                                  : Result.getActiveBits())
+                           : Result.getActiveBits();
+    if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits)
+      return true;
+
+    // If the signedness of the scalar type and the vector element type
+    // differs and the number of bits is greater than that of the vector
+    // element reject it.
+    return (IntSigned != OtherIntSigned &&
+            NumBits > S.Context.getIntWidth(OtherIntTy));
+  }
+
+  // Reject cases where the value of the scalar is not constant and it's
+  // order is greater than that of the vector element type.
+  return (Order < 0);
+}
+
+/// Test if a (constant) integer Int can be casted to floating point type
+/// FloatTy without losing precision.
+static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int,
+                                     QualType FloatTy) {
+  QualType IntTy = Int->get()->getType().getUnqualifiedType();
+
+  // Determine if the integer constant can be expressed as a floating point
+  // number of the appropiate type.
+  llvm::APSInt Result;
+  bool CstInt = Int->get()->EvaluateAsInt(Result, S.Context);
+  uint64_t Bits = 0;
+  if (CstInt) {
+    // Reject constants that would be truncated if they were converted to
+    // the floating point type. Test by simple to/from conversion.
+    // FIXME: Ideally the conversion to an APFloat and from an APFloat
+    //        could be avoided if there was a convertFromAPInt method
+    //        which could signal back if implicit truncation occurred.
+    llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy));
+    Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(),
+                           llvm::APFloat::rmTowardZero);
+    llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy),
+                             !IntTy->hasSignedIntegerRepresentation());
+    bool Ignored = false;
+    Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven,
+                           &Ignored);
+    if (Result != ConvertBack)
+      return true;
+  } else {
+    // Reject types that cannot be fully encoded into the mantissa of
+    // the float.
+    Bits = S.Context.getTypeSize(IntTy);
+    unsigned FloatPrec = llvm::APFloat::semanticsPrecision(
+        S.Context.getFloatTypeSemantics(FloatTy));
+    if (Bits > FloatPrec)
+      return true;
+  }
+
+  return false;
+}
+
+/// Attempt to convert and splat Scalar into a vector whose types matches
+/// Vector following GCC conversion rules. The rule is that implicit
+/// conversion can occur when Scalar can be casted to match Vector's element
+/// type without causing truncation of Scalar.
+static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar,
+                                        ExprResult *Vector) {
+  QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType();
+  QualType VectorTy = Vector->get()->getType().getUnqualifiedType();
+  const VectorType *VT = VectorTy->getAs<VectorType>();
+
+  assert(!isa<ExtVectorType>(VT) &&
+         "ExtVectorTypes should not be handled here!");
+
+  QualType VectorEltTy = VT->getElementType();
+
+  // Reject cases where the vector element type or the scalar element type are
+  // not integral or floating point types.
+  if (!VectorEltTy->isArithmeticType() || !ScalarTy->isArithmeticType())
+    return true;
+
+  // The conversion to apply to the scalar before splatting it,
+  // if necessary.
+  CastKind ScalarCast = CK_NoOp;
+
+  // Accept cases where the vector elements are integers and the scalar is
+  // an integer.
+  // FIXME: Notionally if the scalar was a floating point value with a precise
+  //        integral representation, we could cast it to an appropriate integer
+  //        type and then perform the rest of the checks here. GCC will perform
+  //        this conversion in some cases as determined by the input language.
+  //        We should accept it on a language independent basis.
+  if (VectorEltTy->isIntegralType(S.Context) &&
+      ScalarTy->isIntegralType(S.Context) &&
+      S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) {
+
+    if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy))
+      return true;
+
+    ScalarCast = CK_IntegralCast;
+  } else if (VectorEltTy->isRealFloatingType()) {
+    if (ScalarTy->isRealFloatingType()) {
+
+      // Reject cases where the scalar type is not a constant and has a higher
+      // Order than the vector element type.
+      llvm::APFloat Result(0.0);
+      bool CstScalar = Scalar->get()->EvaluateAsFloat(Result, S.Context);
+      int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy);
+      if (!CstScalar && Order < 0)
+        return true;
+
+      // If the scalar cannot be safely casted to the vector element type,
+      // reject it.
+      if (CstScalar) {
+        bool Truncated = false;
+        Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy),
+                       llvm::APFloat::rmNearestTiesToEven, &Truncated);
+        if (Truncated)
+          return true;
+      }
+
+      ScalarCast = CK_FloatingCast;
+    } else if (ScalarTy->isIntegralType(S.Context)) {
+      if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy))
+        return true;
+
+      ScalarCast = CK_IntegralToFloating;
+    } else
+      return true;
+  }
+
+  // Adjust scalar if desired.
+  if (Scalar) {
+    if (ScalarCast != CK_NoOp)
+      *Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast);
+    *Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat);
+  }
+  return false;
+}
+
 QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
                                    SourceLocation Loc, bool IsCompAssign,
                                    bool AllowBothBool,
@@ -8143,19 +8329,29 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     }
   }
 
-  // If there's an ext-vector type and a scalar, try to convert the scalar to
+  // If there's a vector type and a scalar, try to convert the scalar to
   // the vector element type and splat.
-  // FIXME: this should also work for regular vector types as supported in GCC.
-  if (!RHSVecType && isa<ExtVectorType>(LHSVecType)) {
-    if (!tryVectorConvertAndSplat(*this, &RHS, RHSType,
-                                  LHSVecType->getElementType(), LHSType))
-      return LHSType;
+  if (!RHSVecType) {
+    if (isa<ExtVectorType>(LHSVecType)) {
+      if (!tryVectorConvertAndSplat(*this, &RHS, RHSType,
+                                    LHSVecType->getElementType(), LHSType))
+        return LHSType;
+    } else {
+      if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS))
+        return LHSType;
+    }
   }
-  if (!LHSVecType && isa<ExtVectorType>(RHSVecType)) {
-    if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS),
-                                  LHSType, RHSVecType->getElementType(),
-                                  RHSType))
-      return RHSType;
+  if (!LHSVecType) {
+    if (isa<ExtVectorType>(RHSVecType)) {
+      if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS),
+                                    LHSType, RHSVecType->getElementType(),
+                                    RHSType))
+        return RHSType;
+    } else {
+      if (LHS.get()->getValueKind() == VK_LValue ||
+          !tryGCCVectorConvertAndSplat(*this, &LHS, &RHS))
+        return RHSType;
+    }
   }
 
   // FIXME: The code below also handles conversion between vectors and
@@ -8208,6 +8404,22 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     return QualType();
   }
 
+
+  // If there is a vector type that is not a ExtVector and a scalar, we reach
+  // this point if scalar could not be converted to the vector's element type
+  // without truncation.
+  if ((RHSVecType && !isa<ExtVectorType>(RHSVecType)) ||
+      (LHSVecType && !isa<ExtVectorType>(LHSVecType))) {
+    QualType Scalar = LHSVecType ? RHSType : LHSType;
+    QualType Vector = LHSVecType ? LHSType : RHSType;
+    unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0;
+    Diag(Loc,
+         diag::err_typecheck_vector_not_convertable_implict_truncation)
+        << ScalarOrVector << Scalar << Vector;
+
+    return QualType();
+  }
+
   // Otherwise, use the generic diagnostic.
   Diag(Loc, diag::err_typecheck_vector_not_convertable)
     << LHSType << RHSType
@@ -9827,6 +10039,12 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
   if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 &&
       vType->hasFloatingRepresentation())
     return InvalidOperands(Loc, LHS, RHS);
+  // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the
+  //        usage of the logical operators && and || with vectors in C. This
+  //        check could be notionally dropped.
+  if (!getLangOpts().CPlusPlus &&
+      !(isa<ExtVectorType>(vType->getAs<VectorType>())))
+    return InvalidLogicalVectorOperands(Loc, LHS, RHS);
 
   return GetSignedVectorType(LHS.get()->getType());
 }
@@ -11770,6 +11988,8 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
       resultType = GetSignedVectorType(resultType);
       break;
     } else {
+      // FIXME: GCC's vector extension permits the usage of '!' with a vector
+      //        type in C++. We should allow that here too.
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
         << resultType << Input.get()->getSourceRange());
     }
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index 9b88cddbc969..8500b748a3ec 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -4720,10 +4720,24 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT,
     // regard to cv-qualifiers.
 
     const RecordType *lhsRecord = LhsT->getAs<RecordType>();
-    if (!lhsRecord) return false;
-
     const RecordType *rhsRecord = RhsT->getAs<RecordType>();
-    if (!rhsRecord) return false;
+    if (!rhsRecord || !lhsRecord) {
+      const ObjCObjectType *LHSObjTy = LhsT->getAs<ObjCObjectType>();
+      const ObjCObjectType *RHSObjTy = RhsT->getAs<ObjCObjectType>();
+      if (!LHSObjTy || !RHSObjTy)
+        return false;
+
+      ObjCInterfaceDecl *BaseInterface = LHSObjTy->getInterface();
+      ObjCInterfaceDecl *DerivedInterface = RHSObjTy->getInterface();
+      if (!BaseInterface || !DerivedInterface)
+        return false;
+
+      if (Self.RequireCompleteType(
+              KeyLoc, RhsT, diag::err_incomplete_type_used_in_type_trait_expr))
+        return false;
+
+      return BaseInterface->isSuperClassOf(DerivedInterface);
+    }
 
     assert(Self.Context.hasSameUnqualifiedType(LhsT, RhsT)
              == (lhsRecord == rhsRecord));
@@ -5342,6 +5356,15 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
 
   // C++11 [expr.cond]p1
   //   The first expression is contextually converted to bool.
+  //
+  // FIXME; GCC's vector extension permits the use of a?b:c where the type of
+  //        a is that of a integer vector with the same number of elements and
+  //        size as the vectors of b and c. If one of either b or c is a scalar
+  //        it is implicitly converted to match the type of the vector.
+  //        Otherwise the expression is ill-formed. If both b and c are scalars,
+  //        then b and c are checked and converted to the type of a if possible.
+  //        Unlike the OpenCL ?: operator, the expression is evaluated as
+  //        (a[0] != 0 ? b[0] : c[0], .. , a[n] != 0 ? b[n] : c[n]).
   if (!Cond.get()->isTypeDependent()) {
     ExprResult CondRes = CheckCXXBooleanCondition(Cond.get());
     if (CondRes.isInvalid())
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp
index a44e9243e3c5..28581bad1a7a 100644
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -4241,8 +4241,7 @@ void Sema::diagnoseARCUnbridgedCast(Expr *e) {
     castType = cast->getTypeAsWritten();
     CCK = CCK_OtherCast;
   } else {
-    castType = cast->getType();
-    CCK = CCK_ImplicitConversion;
+    llvm_unreachable("Unexpected ImplicitCastExpr");
   }
 
   ARCConversionTypeClass castACTC =
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index d0f530010a0d..32024cb335dc 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -1209,7 +1209,7 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
 
   } else {
     assert((ElemType->isRecordType() || ElemType->isVectorType() ||
-            ElemType->isClkEventT()) && "Unexpected type");
+            ElemType->isOpenCLSpecificType()) && "Unexpected type");
 
     // C99 6.7.8p13:
     //
@@ -8296,8 +8296,46 @@ Sema::PerformCopyInitialization(const InitializedEntity &Entity,
                                                            AllowExplicit);
   InitializationSequence Seq(*this, Entity, Kind, InitE, TopLevelOfInitList);
 
+  // Prevent infinite recursion when performing parameter copy-initialization.
+  const bool ShouldTrackCopy =
+      Entity.isParameterKind() && Seq.isConstructorInitialization();
+  if (ShouldTrackCopy) {
+    if (llvm::find(CurrentParameterCopyTypes, Entity.getType()) !=
+        CurrentParameterCopyTypes.end()) {
+      Seq.SetOverloadFailure(
+          InitializationSequence::FK_ConstructorOverloadFailed,
+          OR_No_Viable_Function);
+
+      // Try to give a meaningful diagnostic note for the problematic
+      // constructor.
+      const auto LastStep = Seq.step_end() - 1;
+      assert(LastStep->Kind ==
+             InitializationSequence::SK_ConstructorInitialization);
+      const FunctionDecl *Function = LastStep->Function.Function;
+      auto Candidate =
+          llvm::find_if(Seq.getFailedCandidateSet(),
+                        [Function](const OverloadCandidate &Candidate) -> bool {
+                          return Candidate.Viable &&
+                                 Candidate.Function == Function &&
+                                 Candidate.Conversions.size() > 0;
+                        });
+      if (Candidate != Seq.getFailedCandidateSet().end() &&
+          Function->getNumParams() > 0) {
+        Candidate->Viable = false;
+        Candidate->FailureKind = ovl_fail_bad_conversion;
+        Candidate->Conversions[0].setBad(BadConversionSequence::no_conversion,
+                                         InitE,
+                                         Function->getParamDecl(0)->getType());
+      }
+    }
+    CurrentParameterCopyTypes.push_back(Entity.getType());
+  }
+
   ExprResult Result = Seq.Perform(*this, Entity, Kind, InitE);
 
+  if (ShouldTrackCopy)
+    CurrentParameterCopyTypes.pop_back();
+
   return Result;
 }
 
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index ce76e14982db..c5b579a4b2e9 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -1382,8 +1382,8 @@ Module *Sema::getOwningModule(Decl *Entity) {
   return M;
 }
 
-void Sema::makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc) {
-  if (auto *M = PP.getModuleContainingLocation(Loc))
+void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
+  if (auto *M = getCurrentModule())
     Context.mergeDefinitionIntoModule(ND, M);
   else
     // We're not building a module; just make the definition visible.
@@ -1393,7 +1393,7 @@ void Sema::makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc) {
   // visible too. They're not (necessarily) within a mergeable DeclContext.
   if (auto *TD = dyn_cast<TemplateDecl>(ND))
     for (auto *Param : *TD->getTemplateParameters())
-      makeMergedDefinitionVisible(Param, Loc);
+      makeMergedDefinitionVisible(Param);
 }
 
 /// \brief Find the module in which the given declaration was defined.
@@ -3445,7 +3445,8 @@ static void LookupVisibleDecls(DeclContext *Ctx, LookupResult &Result,
                                bool QualifiedNameLookup,
                                bool InBaseClass,
                                VisibleDeclConsumer &Consumer,
-                               VisibleDeclsRecord &Visited) {
+                               VisibleDeclsRecord &Visited,
+                               bool IncludeDependentBases = false) {
   if (!Ctx)
     return;
 
@@ -3501,7 +3502,8 @@ static void LookupVisibleDecls(DeclContext *Ctx, LookupResult &Result,
     ShadowContextRAII Shadow(Visited);
     for (auto I : Ctx->using_directives()) {
       LookupVisibleDecls(I->getNominatedNamespace(), Result,
-                         QualifiedNameLookup, InBaseClass, Consumer, Visited);
+                         QualifiedNameLookup, InBaseClass, Consumer, Visited,
+                         IncludeDependentBases);
     }
   }
 
@@ -3513,14 +3515,28 @@ static void LookupVisibleDecls(DeclContext *Ctx, LookupResult &Result,
     for (const auto &B : Record->bases()) {
       QualType BaseType = B.getType();
 
-      // Don't look into dependent bases, because name lookup can't look
-      // there anyway.
-      if (BaseType->isDependentType())
-        continue;
-
-      const RecordType *Record = BaseType->getAs<RecordType>();
-      if (!Record)
-        continue;
+      RecordDecl *RD;
+      if (BaseType->isDependentType()) {
+        if (!IncludeDependentBases) {
+          // Don't look into dependent bases, because name lookup can't look
+          // there anyway.
+          continue;
+        }
+        const auto *TST = BaseType->getAs<TemplateSpecializationType>();
+        if (!TST)
+          continue;
+        TemplateName TN = TST->getTemplateName();
+        const auto *TD =
+            dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
+        if (!TD)
+          continue;
+        RD = TD->getTemplatedDecl();
+      } else {
+        const auto *Record = BaseType->getAs<RecordType>();
+        if (!Record)
+          continue;
+        RD = Record->getDecl();
+      }
 
       // FIXME: It would be nice to be able to determine whether referencing
       // a particular member would be ambiguous. For example, given
@@ -3543,8 +3559,8 @@ static void LookupVisibleDecls(DeclContext *Ctx, LookupResult &Result,
 
       // Find results in this base class (and its bases).
       ShadowContextRAII Shadow(Visited);
-      LookupVisibleDecls(Record->getDecl(), Result, QualifiedNameLookup,
-                         true, Consumer, Visited);
+      LookupVisibleDecls(RD, Result, QualifiedNameLookup, true, Consumer,
+                         Visited, IncludeDependentBases);
     }
   }
 
@@ -3713,7 +3729,8 @@ void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind,
 
 void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
                               VisibleDeclConsumer &Consumer,
-                              bool IncludeGlobalScope) {
+                              bool IncludeGlobalScope,
+                              bool IncludeDependentBases) {
   LookupResult Result(*this, DeclarationName(), SourceLocation(), Kind);
   Result.setAllowHidden(Consumer.includeHiddenDecls());
   VisibleDeclsRecord Visited;
@@ -3721,7 +3738,8 @@ void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
     Visited.visitedContext(Context.getTranslationUnitDecl());
   ShadowContextRAII Shadow(Visited);
   ::LookupVisibleDecls(Ctx, Result, /*QualifiedNameLookup=*/true,
-                       /*InBaseClass=*/false, Consumer, Visited);
+                       /*InBaseClass=*/false, Consumer, Visited,
+                       IncludeDependentBases);
 }
 
 /// LookupOrCreateLabel - Do a name lookup of a label with the specified name.
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 782c377e3202..51794160278c 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -11210,12 +11210,12 @@ Sema::resolveAddressOfOnlyViableOverloadCandidate(Expr *E,
 /// \brief Given an overloaded function, tries to turn it into a non-overloaded
 /// function reference using resolveAddressOfOnlyViableOverloadCandidate. This
 /// will perform access checks, diagnose the use of the resultant decl, and, if
-/// necessary, perform a function-to-pointer decay.
+/// requested, potentially perform a function-to-pointer decay.
 ///
 /// Returns false if resolveAddressOfOnlyViableOverloadCandidate fails.
 /// Otherwise, returns true. This may emit diagnostics and return true.
 bool Sema::resolveAndFixAddressOfOnlyViableOverloadCandidate(
-    ExprResult &SrcExpr) {
+    ExprResult &SrcExpr, bool DoFunctionPointerConverion) {
   Expr *E = SrcExpr.get();
   assert(E->getType() == Context.OverloadTy && "SrcExpr must be an overload");
 
@@ -11230,7 +11230,7 @@ bool Sema::resolveAndFixAddressOfOnlyViableOverloadCandidate(
   DiagnoseUseOfDecl(Found, E->getExprLoc());
   CheckAddressOfMemberAccess(E, DAP);
   Expr *Fixed = FixOverloadedFunctionReference(E, DAP, Found);
-  if (Fixed->getType()->isFunctionType())
+  if (DoFunctionPointerConverion && Fixed->getType()->isFunctionType())
     SrcExpr = DefaultFunctionArrayConversion(Fixed, /*Diagnose=*/false);
   else
     SrcExpr = Fixed;
diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp
index 5d7eada28717..33a8f9c4afa3 100644
--- a/lib/Sema/SemaStmt.cpp
+++ b/lib/Sema/SemaStmt.cpp
@@ -2268,9 +2268,57 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
         BoundExpr = IntegerLiteral::Create(
             Context, CAT->getSize(), Context.getPointerDiffType(), RangeLoc);
       else if (const VariableArrayType *VAT =
-               dyn_cast<VariableArrayType>(UnqAT))
-        BoundExpr = VAT->getSizeExpr();
-      else {
+               dyn_cast<VariableArrayType>(UnqAT)) {
+        // For a variably modified type we can't just use the expression within
+        // the array bounds, since we don't want that to be re-evaluated here.
+        // Rather, we need to determine what it was when the array was first
+        // created - so we resort to using sizeof(vla)/sizeof(element).
+        // For e.g.
+        //  void f(int b) { 
+        //    int vla[b];
+        //    b = -1;   <-- This should not affect the num of iterations below
+        //    for (int &c : vla) { .. }
+        //  }
+
+        // FIXME: This results in codegen generating IR that recalculates the
+        // run-time number of elements (as opposed to just using the IR Value
+        // that corresponds to the run-time value of each bound that was
+        // generated when the array was created.) If this proves too embarassing
+        // even for unoptimized IR, consider passing a magic-value/cookie to
+        // codegen that then knows to simply use that initial llvm::Value (that
+        // corresponds to the bound at time of array creation) within
+        // getelementptr.  But be prepared to pay the price of increasing a
+        // customized form of coupling between the two components - which  could
+        // be hard to maintain as the codebase evolves.
+
+        ExprResult SizeOfVLAExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo(
+                                                 VAT->desugar(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfVLAExprR.isInvalid())
+          return StmtError();
+        
+        ExprResult SizeOfEachElementExprR = ActOnUnaryExprOrTypeTraitExpr(
+            EndVar->getLocation(), UETT_SizeOf,
+            /*isType=*/true,
+            CreateParsedType(VAT->desugar(),
+                             Context.getTrivialTypeSourceInfo(
+                                 VAT->getElementType(), RangeLoc))
+                .getAsOpaquePtr(),
+            EndVar->getSourceRange());
+        if (SizeOfEachElementExprR.isInvalid())
+          return StmtError();
+
+        BoundExpr =
+            ActOnBinOp(S, EndVar->getLocation(), tok::slash,
+                       SizeOfVLAExprR.get(), SizeOfEachElementExprR.get());
+        if (BoundExpr.isInvalid())
+          return StmtError();
+        
+      } else {
         // Can't be a DependentSizedArrayType or an IncompleteArrayType since
         // UnqAT is not incomplete and Range is not type-dependent.
         llvm_unreachable("Unexpected array type in for-range");
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp
index 61b4df40964c..a479d1027533 100644
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -455,6 +455,85 @@ void Sema::LookupTemplateName(LookupResult &Found,
   }
 }
 
+void Sema::diagnoseExprIntendedAsTemplateName(Scope *S, ExprResult TemplateName,
+                                              SourceLocation Less,
+                                              SourceLocation Greater) {
+  if (TemplateName.isInvalid())
+    return;
+
+  DeclarationNameInfo NameInfo;
+  CXXScopeSpec SS;
+  LookupNameKind LookupKind;
+
+  DeclContext *LookupCtx = nullptr;
+  NamedDecl *Found = nullptr;
+
+  // Figure out what name we looked up.
+  if (auto *ME = dyn_cast<MemberExpr>(TemplateName.get())) {
+    NameInfo = ME->getMemberNameInfo();
+    SS.Adopt(ME->getQualifierLoc());
+    LookupKind = LookupMemberName;
+    LookupCtx = ME->getBase()->getType()->getAsCXXRecordDecl();
+    Found = ME->getMemberDecl();
+  } else {
+    auto *DRE = cast<DeclRefExpr>(TemplateName.get());
+    NameInfo = DRE->getNameInfo();
+    SS.Adopt(DRE->getQualifierLoc());
+    LookupKind = LookupOrdinaryName;
+    Found = DRE->getFoundDecl();
+  }
+
+  // Try to correct the name by looking for templates and C++ named casts.
+  struct TemplateCandidateFilter : CorrectionCandidateCallback {
+    TemplateCandidateFilter() {
+      WantTypeSpecifiers = false;
+      WantExpressionKeywords = false;
+      WantRemainingKeywords = false;
+      WantCXXNamedCasts = true;
+    };
+    bool ValidateCandidate(const TypoCorrection &Candidate) override {
+      if (auto *ND = Candidate.getCorrectionDecl())
+        return isAcceptableTemplateName(ND->getASTContext(), ND, true);
+      return Candidate.isKeyword();
+    }
+  };
+
+  DeclarationName Name = NameInfo.getName();
+  if (TypoCorrection Corrected =
+          CorrectTypo(NameInfo, LookupKind, S, &SS,
+                      llvm::make_unique<TemplateCandidateFilter>(),
+                      CTK_ErrorRecovery, LookupCtx)) {
+    auto *ND = Corrected.getFoundDecl();
+    if (ND)
+      ND = isAcceptableTemplateName(Context, ND,
+                                    /*AllowFunctionTemplates*/ true);
+    if (ND || Corrected.isKeyword()) {
+      if (LookupCtx) {
+        std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
+        bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
+                                Name.getAsString() == CorrectedStr;
+        diagnoseTypo(Corrected,
+                     PDiag(diag::err_non_template_in_member_template_id_suggest)
+                         << Name << LookupCtx << DroppedSpecifier
+                         << SS.getRange(), false);
+      } else {
+        diagnoseTypo(Corrected,
+                     PDiag(diag::err_non_template_in_template_id_suggest)
+                         << Name, false);
+      }
+      if (Found)
+        Diag(Found->getLocation(),
+             diag::note_non_template_in_template_id_found);
+      return;
+    }
+  }
+
+  Diag(NameInfo.getLoc(), diag::err_non_template_in_template_id)
+    << Name << SourceRange(Less, Greater);
+  if (Found)
+    Diag(Found->getLocation(), diag::note_non_template_in_template_id_found);
+}
+
 /// ActOnDependentIdExpression - Handle a dependent id-expression that
 /// was just parsed.  This is only possible with an explicit scope
 /// specifier naming a dependent type.
@@ -1251,8 +1330,8 @@ Sema::CheckClassTemplate(Scope *S, unsigned TagSpec, TagUseKind TUK,
           auto *Tmpl = cast<CXXRecordDecl>(Hidden)->getDescribedClassTemplate();
           assert(Tmpl && "original definition of a class template is not a "
                          "class template?");
-          makeMergedDefinitionVisible(Hidden, KWLoc);
-          makeMergedDefinitionVisible(Tmpl, KWLoc);
+          makeMergedDefinitionVisible(Hidden);
+          makeMergedDefinitionVisible(Tmpl);
           return Def;
         }
 
@@ -7352,7 +7431,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
     NamedDecl *Hidden = nullptr;
     if (Def && SkipBody && !hasVisibleDefinition(Def, &Hidden)) {
       SkipBody->ShouldSkip = true;
-      makeMergedDefinitionVisible(Hidden, KWLoc);
+      makeMergedDefinitionVisible(Hidden);
       // From here on out, treat this as just a redeclaration.
       TUK = TUK_Declaration;
     } else if (Def) {
@@ -7825,6 +7904,9 @@ bool Sema::CheckFunctionTemplateSpecialization(
     // C++11 [dcl.constexpr]p1: An explicit specialization of a constexpr
     // function can differ from the template declaration with respect to
     // the constexpr specifier.
+    // FIXME: We need an update record for this AST mutation.
+    // FIXME: What if there are multiple such prior declarations (for instance,
+    // from different modules)?
     Specialization->setConstexpr(FD->isConstexpr());
   }
 
@@ -7872,9 +7954,11 @@ bool Sema::CheckFunctionTemplateSpecialization(
     // flag to not-deleted, so that we can inherit that information from 'FD'.
     if (Specialization->isDeleted() && !SpecInfo->isExplicitSpecialization() &&
         !Specialization->getCanonicalDecl()->isReferenced()) {
+      // FIXME: This assert will not hold in the presence of modules.
       assert(
           Specialization->getCanonicalDecl() == Specialization &&
           "This must be the only existing declaration of this specialization");
+      // FIXME: We need an update record for this AST mutation.
       Specialization->setDeletedAsWritten(false);
     }
     SpecInfo->setTemplateSpecializationKind(TSK_ExplicitSpecialization);
@@ -7987,8 +8071,11 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
     return false;
   }
 
-  // If this is a friend, just bail out here before we start turning
-  // things into explicit specializations.
+  // A member specialization in a friend declaration isn't really declaring
+  // an explicit specialization, just identifying a specific (possibly implicit)
+  // specialization. Don't change the template specialization kind.
+  //
+  // FIXME: Is this really valid? Other compilers reject.
   if (Member->getFriendObjectKind() != Decl::FOK_None) {
     // Preserve instantiation information.
     if (InstantiatedFrom && isa<CXXMethodDecl>(Member)) {
@@ -8038,66 +8125,36 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
                                        false))
     return true;
 
-  // Note that this is an explicit instantiation of a member.
-  // the original declaration to note that it is an explicit specialization
-  // (if it was previously an implicit instantiation). This latter step
-  // makes bookkeeping easier.
-  if (isa<FunctionDecl>(Member)) {
+  // Note that this member specialization is an "instantiation of" the
+  // corresponding member of the original template.
+  if (auto *MemberFunction = dyn_cast<FunctionDecl>(Member)) {
     FunctionDecl *InstantiationFunction = cast<FunctionDecl>(Instantiation);
     if (InstantiationFunction->getTemplateSpecializationKind() ==
           TSK_ImplicitInstantiation) {
-      InstantiationFunction->setTemplateSpecializationKind(
-                                                  TSK_ExplicitSpecialization);
-      InstantiationFunction->setLocation(Member->getLocation());
       // Explicit specializations of member functions of class templates do not
       // inherit '=delete' from the member function they are specializing.
       if (InstantiationFunction->isDeleted()) {
+        // FIXME: This assert will not hold in the presence of modules.
         assert(InstantiationFunction->getCanonicalDecl() ==
                InstantiationFunction);
+        // FIXME: We need an update record for this AST mutation.
         InstantiationFunction->setDeletedAsWritten(false);
       }
     }
 
-    cast<FunctionDecl>(Member)->setInstantiationOfMemberFunction(
-                                        cast<CXXMethodDecl>(InstantiatedFrom),
-                                                  TSK_ExplicitSpecialization);
-    MarkUnusedFileScopedDecl(InstantiationFunction);
-  } else if (isa<VarDecl>(Member)) {
-    VarDecl *InstantiationVar = cast<VarDecl>(Instantiation);
-    if (InstantiationVar->getTemplateSpecializationKind() ==
-          TSK_ImplicitInstantiation) {
-      InstantiationVar->setTemplateSpecializationKind(
-                                                  TSK_ExplicitSpecialization);
-      InstantiationVar->setLocation(Member->getLocation());
-    }
-
-    cast<VarDecl>(Member)->setInstantiationOfStaticDataMember(
+    MemberFunction->setInstantiationOfMemberFunction(
+        cast<CXXMethodDecl>(InstantiatedFrom), TSK_ExplicitSpecialization);
+  } else if (auto *MemberVar = dyn_cast<VarDecl>(Member)) {
+    MemberVar->setInstantiationOfStaticDataMember(
         cast<VarDecl>(InstantiatedFrom), TSK_ExplicitSpecialization);
-    MarkUnusedFileScopedDecl(InstantiationVar);
-  } else if (isa<CXXRecordDecl>(Member)) {
-    CXXRecordDecl *InstantiationClass = cast<CXXRecordDecl>(Instantiation);
-    if (InstantiationClass->getTemplateSpecializationKind() ==
-          TSK_ImplicitInstantiation) {
-      InstantiationClass->setTemplateSpecializationKind(
-                                                   TSK_ExplicitSpecialization);
-      InstantiationClass->setLocation(Member->getLocation());
-    }
-
-    cast<CXXRecordDecl>(Member)->setInstantiationOfMemberClass(
-                                        cast<CXXRecordDecl>(InstantiatedFrom),
-                                                   TSK_ExplicitSpecialization);
-  } else {
-    assert(isa<EnumDecl>(Member) && "Only member enums remain");
-    EnumDecl *InstantiationEnum = cast<EnumDecl>(Instantiation);
-    if (InstantiationEnum->getTemplateSpecializationKind() ==
-          TSK_ImplicitInstantiation) {
-      InstantiationEnum->setTemplateSpecializationKind(
-                                                   TSK_ExplicitSpecialization);
-      InstantiationEnum->setLocation(Member->getLocation());
-    }
-
-    cast<EnumDecl>(Member)->setInstantiationOfMemberEnum(
+  } else if (auto *MemberClass = dyn_cast<CXXRecordDecl>(Member)) {
+    MemberClass->setInstantiationOfMemberClass(
+        cast<CXXRecordDecl>(InstantiatedFrom), TSK_ExplicitSpecialization);
+  } else if (auto *MemberEnum = dyn_cast<EnumDecl>(Member)) {
+    MemberEnum->setInstantiationOfMemberEnum(
         cast<EnumDecl>(InstantiatedFrom), TSK_ExplicitSpecialization);
+  } else {
+    llvm_unreachable("unknown member specialization kind");
   }
 
   // Save the caller the trouble of having to figure out which declaration
@@ -8107,6 +8164,43 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
   return false;
 }
 
+/// Complete the explicit specialization of a member of a class template by
+/// updating the instantiated member to be marked as an explicit specialization.
+///
+/// \param OrigD The member declaration instantiated from the template.
+/// \param Loc The location of the explicit specialization of the member.
+template<typename DeclT>
+static void completeMemberSpecializationImpl(Sema &S, DeclT *OrigD,
+                                             SourceLocation Loc) {
+  if (OrigD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
+    return;
+
+  // FIXME: Inform AST mutation listeners of this AST mutation.
+  // FIXME: If there are multiple in-class declarations of the member (from
+  // multiple modules, or a declaration and later definition of a member type),
+  // should we update all of them?
+  OrigD->setTemplateSpecializationKind(TSK_ExplicitSpecialization);
+  OrigD->setLocation(Loc);
+}
+
+void Sema::CompleteMemberSpecialization(NamedDecl *Member,
+                                        LookupResult &Previous) {
+  NamedDecl *Instantiation = cast<NamedDecl>(Member->getCanonicalDecl());
+  if (Instantiation == Member)
+    return;
+
+  if (auto *Function = dyn_cast<CXXMethodDecl>(Instantiation))
+    completeMemberSpecializationImpl(*this, Function, Member->getLocation());
+  else if (auto *Var = dyn_cast<VarDecl>(Instantiation))
+    completeMemberSpecializationImpl(*this, Var, Member->getLocation());
+  else if (auto *Record = dyn_cast<CXXRecordDecl>(Instantiation))
+    completeMemberSpecializationImpl(*this, Record, Member->getLocation());
+  else if (auto *Enum = dyn_cast<EnumDecl>(Instantiation))
+    completeMemberSpecializationImpl(*this, Enum, Member->getLocation());
+  else
+    llvm_unreachable("unknown member specialization kind");
+}
+
 /// \brief Check the scope of an explicit instantiation.
 ///
 /// \returns true if a serious error occurs, false otherwise.
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 9a71a17561c7..03df6fde6c80 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1849,6 +1849,19 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D,
         }
       }
     }
+
+    // Check the template parameter list against the previous declaration. The
+    // goal here is to pick up default arguments added since the friend was
+    // declared; we know the template parameter lists match, since otherwise
+    // we would not have picked this template as the previous declaration.
+    if (TemplateParams && FunctionTemplate->getPreviousDecl()) {
+      SemaRef.CheckTemplateParameterList(
+          TemplateParams,
+          FunctionTemplate->getPreviousDecl()->getTemplateParameters(),
+          Function->isThisDeclarationADefinition()
+              ? Sema::TPC_FriendFunctionTemplateDefinition
+              : Sema::TPC_FriendFunctionTemplate);
+    }
   }
 
   if (Function->isLocalExternDecl() && !Function->getPreviousDecl())
@@ -3660,6 +3673,7 @@ TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
       New->setType(SemaRef.Context.getFunctionType(
           NewProto->getReturnType(), NewProto->getParamTypes(), EPI));
     } else {
+      Sema::ContextRAII SwitchContext(SemaRef, New);
       SemaRef.SubstExceptionSpec(New, Proto, TemplateArgs);
     }
   }
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index bcc66bbd1c0a..3992179fabae 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -2285,8 +2285,9 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
   // Methods cannot return interface types. All ObjC objects are
   // passed by reference.
   if (T->isObjCObjectType()) {
-    Diag(Loc, diag::err_object_cannot_be_passed_returned_by_value) << 0 << T;
-    return 0;
+    Diag(Loc, diag::err_object_cannot_be_passed_returned_by_value)
+        << 0 << T << FixItHint::CreateInsertion(Loc, "*");
+    return true;
   }
 
   return false;
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index 61b5a822c552..ef8481488302 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -1534,9 +1534,8 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
         return Macro;
 
       unsigned NextIndex = 1; // Skip identifier ID.
-      SubmoduleID SubModID = getGlobalSubmoduleID(F, Record[NextIndex++]);
       SourceLocation Loc = ReadSourceLocation(F, Record, NextIndex);
-      MacroInfo *MI = PP.AllocateDeserializedMacroInfo(Loc, SubModID);
+      MacroInfo *MI = PP.AllocateMacroInfo(Loc);
       MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex));
       MI->setIsUsed(Record[NextIndex++]);
       MI->setUsedForHeaderGuard(Record[NextIndex++]);
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 8e4b217a44cd..b6c0cb2815fb 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -2413,7 +2413,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
     }
 
     AddIdentifierRef(Name, Record);
-    Record.push_back(inferSubmoduleIDFromLocation(MI->getDefinitionLoc()));
     AddSourceLocation(MI->getDefinitionLoc(), Record);
     AddSourceLocation(MI->getDefinitionEndLoc(), Record);
     Record.push_back(MI->isUsed());
diff --git a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 1ea85d60c9e9..371187747f03 100644
--- a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -233,19 +233,16 @@ void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     if (StringSelectors.empty()) {
       ASTContext &Ctx = C.getASTContext();
       Selector Sels[] = {
-        getKeywordSelector(Ctx, "caseInsensitiveCompare", nullptr),
-        getKeywordSelector(Ctx, "compare", nullptr),
-        getKeywordSelector(Ctx, "compare", "options", nullptr),
-        getKeywordSelector(Ctx, "compare", "options", "range", nullptr),
-        getKeywordSelector(Ctx, "compare", "options", "range", "locale",
-                           nullptr),
-        getKeywordSelector(Ctx, "componentsSeparatedByCharactersInSet",
-                           nullptr),
-        getKeywordSelector(Ctx, "initWithFormat",
-                           nullptr),
-        getKeywordSelector(Ctx, "localizedCaseInsensitiveCompare", nullptr),
-        getKeywordSelector(Ctx, "localizedCompare", nullptr),
-        getKeywordSelector(Ctx, "localizedStandardCompare", nullptr),
+          getKeywordSelector(Ctx, "caseInsensitiveCompare"),
+          getKeywordSelector(Ctx, "compare"),
+          getKeywordSelector(Ctx, "compare", "options"),
+          getKeywordSelector(Ctx, "compare", "options", "range"),
+          getKeywordSelector(Ctx, "compare", "options", "range", "locale"),
+          getKeywordSelector(Ctx, "componentsSeparatedByCharactersInSet"),
+          getKeywordSelector(Ctx, "initWithFormat"),
+          getKeywordSelector(Ctx, "localizedCaseInsensitiveCompare"),
+          getKeywordSelector(Ctx, "localizedCompare"),
+          getKeywordSelector(Ctx, "localizedStandardCompare"),
       };
       for (Selector KnownSel : Sels)
         StringSelectors[KnownSel] = 0;
@@ -262,16 +259,15 @@ void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
 
     if (ArrayWithObjectSel.isNull()) {
       ASTContext &Ctx = C.getASTContext();
-      ArrayWithObjectSel = getKeywordSelector(Ctx, "arrayWithObject", nullptr);
-      AddObjectSel = getKeywordSelector(Ctx, "addObject", nullptr);
+      ArrayWithObjectSel = getKeywordSelector(Ctx, "arrayWithObject");
+      AddObjectSel = getKeywordSelector(Ctx, "addObject");
       InsertObjectAtIndexSel =
-        getKeywordSelector(Ctx, "insertObject", "atIndex", nullptr);
+          getKeywordSelector(Ctx, "insertObject", "atIndex");
       ReplaceObjectAtIndexWithObjectSel =
-        getKeywordSelector(Ctx, "replaceObjectAtIndex", "withObject", nullptr);
+          getKeywordSelector(Ctx, "replaceObjectAtIndex", "withObject");
       SetObjectAtIndexedSubscriptSel =
-        getKeywordSelector(Ctx, "setObject", "atIndexedSubscript", nullptr);
-      ArrayByAddingObjectSel =
-        getKeywordSelector(Ctx, "arrayByAddingObject", nullptr);
+          getKeywordSelector(Ctx, "setObject", "atIndexedSubscript");
+      ArrayByAddingObjectSel = getKeywordSelector(Ctx, "arrayByAddingObject");
     }
 
     if (S == ArrayWithObjectSel || S == AddObjectSel ||
@@ -292,13 +288,11 @@ void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     if (DictionaryWithObjectForKeySel.isNull()) {
       ASTContext &Ctx = C.getASTContext();
       DictionaryWithObjectForKeySel =
-        getKeywordSelector(Ctx, "dictionaryWithObject", "forKey", nullptr);
-      SetObjectForKeySel =
-        getKeywordSelector(Ctx, "setObject", "forKey", nullptr);
+          getKeywordSelector(Ctx, "dictionaryWithObject", "forKey");
+      SetObjectForKeySel = getKeywordSelector(Ctx, "setObject", "forKey");
       SetObjectForKeyedSubscriptSel =
-        getKeywordSelector(Ctx, "setObject", "forKeyedSubscript", nullptr);
-      RemoveObjectForKeySel =
-        getKeywordSelector(Ctx, "removeObjectForKey", nullptr);
+          getKeywordSelector(Ctx, "setObject", "forKeyedSubscript");
+      RemoveObjectForKeySel = getKeywordSelector(Ctx, "removeObjectForKey");
     }
 
     if (S == DictionaryWithObjectForKeySel || S == SetObjectForKeySel) {
diff --git a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 8c2aef21b3ca..48d6cd8a527c 100644
--- a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -41,6 +41,22 @@ bool BuiltinFunctionChecker::evalCall(const CallExpr *CE,
   default:
     return false;
 
+  case Builtin::BI__builtin_assume: {
+    assert (CE->arg_begin() != CE->arg_end());
+    SVal ArgSVal = state->getSVal(CE->getArg(0), LCtx);
+    if (ArgSVal.isUndef())
+      return true; // Return true to model purity.
+
+    state = state->assume(ArgSVal.castAs<DefinedOrUnknownSVal>(), true);
+    // FIXME: do we want to warn here? Not right now. The most reports might
+    // come from infeasible paths, thus being false positives.
+    if (!state)
+      return true;
+
+    C.addTransition(state);
+    return true;
+  }
+
   case Builtin::BI__builtin_unpredictable:
   case Builtin::BI__builtin_expect:
   case Builtin::BI__builtin_assume_aligned:
diff --git a/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp b/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
index c1deadef4202..8a5c769b6b50 100644
--- a/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
@@ -123,14 +123,14 @@ void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
   case 4:
     lazyInitKeywordSelector(HandleFailureInFunctionSel, C.getASTContext(),
                             "handleFailureInFunction", "file", "lineNumber",
-                            "description", nullptr);
+                            "description");
     if (Sel != HandleFailureInFunctionSel)
       return;
     break;
   case 5:
     lazyInitKeywordSelector(HandleFailureInMethodSel, C.getASTContext(),
                             "handleFailureInMethod", "object", "file",
-                            "lineNumber", "description", nullptr);
+                            "lineNumber", "description");
     if (Sel != HandleFailureInMethodSel)
       return;
     break;
diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 3f6ae6222ce0..89b1291c4f46 100644
--- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -703,31 +703,30 @@ class RetainSummaryManager {
     ObjCMethodSummaries[ObjCSummaryKey(ClsII, S)]  = Summ;
   }
 
+  template <typename... Keywords>
   void addMethodSummary(IdentifierInfo *ClsII, ObjCMethodSummariesTy &Summaries,
-                        const RetainSummary *Summ, va_list argp) {
-    Selector S = getKeywordSelector(Ctx, argp);
+                        const RetainSummary *Summ, Keywords *... Kws) {
+    Selector S = getKeywordSelector(Ctx, Kws...);
     Summaries[ObjCSummaryKey(ClsII, S)] = Summ;
   }
 
-  void addInstMethSummary(const char* Cls, const RetainSummary * Summ, ...) {
-    va_list argp;
-    va_start(argp, Summ);
-    addMethodSummary(&Ctx.Idents.get(Cls), ObjCMethodSummaries, Summ, argp);
-    va_end(argp);
+  template <typename... Keywords>
+  void addInstMethSummary(const char *Cls, const RetainSummary *Summ,
+                          Keywords *... Kws) {
+    addMethodSummary(&Ctx.Idents.get(Cls), ObjCMethodSummaries, Summ, Kws...);
   }
 
-  void addClsMethSummary(const char* Cls, const RetainSummary * Summ, ...) {
-    va_list argp;
-    va_start(argp, Summ);
-    addMethodSummary(&Ctx.Idents.get(Cls),ObjCClassMethodSummaries, Summ, argp);
-    va_end(argp);
+  template <typename... Keywords>
+  void addClsMethSummary(const char *Cls, const RetainSummary *Summ,
+                         Keywords *... Kws) {
+    addMethodSummary(&Ctx.Idents.get(Cls), ObjCClassMethodSummaries, Summ,
+                     Kws...);
   }
 
-  void addClsMethSummary(IdentifierInfo *II, const RetainSummary * Summ, ...) {
-    va_list argp;
-    va_start(argp, Summ);
-    addMethodSummary(II, ObjCClassMethodSummaries, Summ, argp);
-    va_end(argp);
+  template <typename... Keywords>
+  void addClsMethSummary(IdentifierInfo *II, const RetainSummary *Summ,
+                         Keywords *... Kws) {
+    addMethodSummary(II, ObjCClassMethodSummaries, Summ, Kws...);
   }
 
 public:
@@ -1640,20 +1639,16 @@ void RetainSummaryManager::InitializeMethodSummaries() {
   addClassMethSummary("NSAutoreleasePool", "new", NoTrackYet);
 
   // Create summaries QCRenderer/QCView -createSnapShotImageOfType:
-  addInstMethSummary("QCRenderer", AllocSumm,
-                     "createSnapshotImageOfType", nullptr);
-  addInstMethSummary("QCView", AllocSumm,
-                     "createSnapshotImageOfType", nullptr);
+  addInstMethSummary("QCRenderer", AllocSumm, "createSnapshotImageOfType");
+  addInstMethSummary("QCView", AllocSumm, "createSnapshotImageOfType");
 
   // Create summaries for CIContext, 'createCGImage' and
   // 'createCGLayerWithSize'.  These objects are CF objects, and are not
   // automatically garbage collected.
-  addInstMethSummary("CIContext", CFAllocSumm,
-                     "createCGImage", "fromRect", nullptr);
+  addInstMethSummary("CIContext", CFAllocSumm, "createCGImage", "fromRect");
   addInstMethSummary("CIContext", CFAllocSumm, "createCGImage", "fromRect",
-                     "format", "colorSpace", nullptr);
-  addInstMethSummary("CIContext", CFAllocSumm, "createCGLayerWithSize", "info",
-                     nullptr);
+                     "format", "colorSpace");
+  addInstMethSummary("CIContext", CFAllocSumm, "createCGLayerWithSize", "info");
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/StaticAnalyzer/Checkers/SelectorExtras.h b/lib/StaticAnalyzer/Checkers/SelectorExtras.h
index 41f70d7d5b69..b11d070c629b 100644
--- a/lib/StaticAnalyzer/Checkers/SelectorExtras.h
+++ b/lib/StaticAnalyzer/Checkers/SelectorExtras.h
@@ -11,48 +11,26 @@
 #define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_SELECTOREXTRAS_H
 
 #include "clang/AST/ASTContext.h"
-#include <cstdarg>
 
 namespace clang {
 namespace ento {
 
-static inline Selector getKeywordSelectorImpl(ASTContext &Ctx,
-                                              const char *First,
-                                              va_list argp) {
-  SmallVector<IdentifierInfo*, 10> II;
-  II.push_back(&Ctx.Idents.get(First));
-
-  while (const char *s = va_arg(argp, const char *))
-    II.push_back(&Ctx.Idents.get(s));
+template <typename... IdentifierInfos>
+static inline Selector getKeywordSelector(ASTContext &Ctx,
+                                          IdentifierInfos *... IIs) {
+  static_assert(sizeof...(IdentifierInfos),
+                "keyword selectors must have at least one argument");
+  SmallVector<IdentifierInfo *, 10> II({&Ctx.Idents.get(IIs)...});
 
   return Ctx.Selectors.getSelector(II.size(), &II[0]);
 }
 
-static inline Selector getKeywordSelector(ASTContext &Ctx, va_list argp) {
-  const char *First = va_arg(argp, const char *);
-  assert(First && "keyword selectors must have at least one argument");
-  return getKeywordSelectorImpl(Ctx, First, argp);
-}
-
-LLVM_END_WITH_NULL
-static inline Selector getKeywordSelector(ASTContext &Ctx,
-                                          const char *First, ...) {
-  va_list argp;
-  va_start(argp, First);
-  Selector result = getKeywordSelectorImpl(Ctx, First, argp);
-  va_end(argp);
-  return result;
-}
-
-LLVM_END_WITH_NULL
+template <typename... IdentifierInfos>
 static inline void lazyInitKeywordSelector(Selector &Sel, ASTContext &Ctx,
-                                           const char *First, ...) {
+                                           IdentifierInfos *... IIs) {
   if (!Sel.isNull())
     return;
-  va_list argp;
-  va_start(argp, First);
-  Sel = getKeywordSelectorImpl(Ctx, First, argp);
-  va_end(argp);
+  Sel = getKeywordSelector(Ctx, IIs...);
 }
 
 static inline void lazyInitNullarySelector(Selector &Sel, ASTContext &Ctx,
diff --git a/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
index 93ad17cffb34..2f9f5d2d9cf8 100644
--- a/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
@@ -440,7 +440,10 @@ StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
   BasicValueFactory &BVF = SVB.getBasicValueFactory();
   initFunctionSummaries(BVF);
 
-  std::string Name = FD->getQualifiedNameAsString();
+  IdentifierInfo *II = FD->getIdentifier();
+  if (!II)
+    return None;
+  StringRef Name = II->getName();
   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
     return None;
 
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 39d88bfda148..caf86b26b66d 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -447,6 +447,7 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
   Bldr.takeNodes(Pred);
 
   NumInlinedCalls++;
+  Engine.FunctionSummaries->bumpNumTimesInlined(D);
 
   // Mark the decl as visited.
   if (VisitedCallees)
@@ -868,8 +869,6 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D,
       || IsRecursive))
     return false;
 
-  Engine.FunctionSummaries->bumpNumTimesInlined(D);
-
   return true;
 }
 
diff --git a/lib/Tooling/RefactoringCallbacks.cpp b/lib/Tooling/RefactoringCallbacks.cpp
index e900c23e4f64..9fd333ca554e 100644
--- a/lib/Tooling/RefactoringCallbacks.cpp
+++ b/lib/Tooling/RefactoringCallbacks.cpp
@@ -9,8 +9,13 @@
 //
 //
 //===----------------------------------------------------------------------===//
-#include "clang/Lex/Lexer.h"
 #include "clang/Tooling/RefactoringCallbacks.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+
+using llvm::StringError;
+using llvm::make_error;
 
 namespace clang {
 namespace tooling {
@@ -20,18 +25,62 @@ tooling::Replacements &RefactoringCallback::getReplacements() {
   return Replace;
 }
 
-static Replacement replaceStmtWithText(SourceManager &Sources,
-                                       const Stmt &From,
-                                       StringRef Text) {
-  return tooling::Replacement(Sources, CharSourceRange::getTokenRange(
-      From.getSourceRange()), Text);
+ASTMatchRefactorer::ASTMatchRefactorer(
+    std::map<std::string, Replacements> &FileToReplaces)
+    : FileToReplaces(FileToReplaces) {}
+
+void ASTMatchRefactorer::addDynamicMatcher(
+    const ast_matchers::internal::DynTypedMatcher &Matcher,
+    RefactoringCallback *Callback) {
+  MatchFinder.addDynamicMatcher(Matcher, Callback);
+  Callbacks.push_back(Callback);
 }
-static Replacement replaceStmtWithStmt(SourceManager &Sources,
-                                       const Stmt &From,
+
+class RefactoringASTConsumer : public ASTConsumer {
+public:
+  explicit RefactoringASTConsumer(ASTMatchRefactorer &Refactoring)
+      : Refactoring(Refactoring) {}
+
+  void HandleTranslationUnit(ASTContext &Context) override {
+    // The ASTMatchRefactorer is re-used between translation units.
+    // Clear the matchers so that each Replacement is only emitted once.
+    for (const auto &Callback : Refactoring.Callbacks) {
+      Callback->getReplacements().clear();
+    }
+    Refactoring.MatchFinder.matchAST(Context);
+    for (const auto &Callback : Refactoring.Callbacks) {
+      for (const auto &Replacement : Callback->getReplacements()) {
+        llvm::Error Err =
+            Refactoring.FileToReplaces[Replacement.getFilePath()].add(
+                Replacement);
+        if (Err) {
+          llvm::errs() << "Skipping replacement " << Replacement.toString()
+                       << " due to this error:\n"
+                       << toString(std::move(Err)) << "\n";
+        }
+      }
+    }
+  }
+
+private:
+  ASTMatchRefactorer &Refactoring;
+};
+
+std::unique_ptr<ASTConsumer> ASTMatchRefactorer::newASTConsumer() {
+  return llvm::make_unique<RefactoringASTConsumer>(*this);
+}
+
+static Replacement replaceStmtWithText(SourceManager &Sources, const Stmt &From,
+                                       StringRef Text) {
+  return tooling::Replacement(
+      Sources, CharSourceRange::getTokenRange(From.getSourceRange()), Text);
+}
+static Replacement replaceStmtWithStmt(SourceManager &Sources, const Stmt &From,
                                        const Stmt &To) {
-  return replaceStmtWithText(Sources, From, Lexer::getSourceText(
-      CharSourceRange::getTokenRange(To.getSourceRange()),
-      Sources, LangOptions()));
+  return replaceStmtWithText(
+      Sources, From,
+      Lexer::getSourceText(CharSourceRange::getTokenRange(To.getSourceRange()),
+                           Sources, LangOptions()));
 }
 
 ReplaceStmtWithText::ReplaceStmtWithText(StringRef FromId, StringRef ToText)
@@ -103,5 +152,90 @@ void ReplaceIfStmtWithItsBody::run(
   }
 }
 
+ReplaceNodeWithTemplate::ReplaceNodeWithTemplate(
+    llvm::StringRef FromId, std::vector<TemplateElement> Template)
+    : FromId(FromId), Template(std::move(Template)) {}
+
+llvm::Expected<std::unique_ptr<ReplaceNodeWithTemplate>>
+ReplaceNodeWithTemplate::create(StringRef FromId, StringRef ToTemplate) {
+  std::vector<TemplateElement> ParsedTemplate;
+  for (size_t Index = 0; Index < ToTemplate.size();) {
+    if (ToTemplate[Index] == '$') {
+      if (ToTemplate.substr(Index, 2) == "$$") {
+        Index += 2;
+        ParsedTemplate.push_back(
+            TemplateElement{TemplateElement::Literal, "$"});
+      } else if (ToTemplate.substr(Index, 2) == "${") {
+        size_t EndOfIdentifier = ToTemplate.find("}", Index);
+        if (EndOfIdentifier == std::string::npos) {
+          return make_error<StringError>(
+              "Unterminated ${...} in replacement template near " +
+                  ToTemplate.substr(Index),
+              llvm::inconvertibleErrorCode());
+        }
+        std::string SourceNodeName =
+            ToTemplate.substr(Index + 2, EndOfIdentifier - Index - 2);
+        ParsedTemplate.push_back(
+            TemplateElement{TemplateElement::Identifier, SourceNodeName});
+        Index = EndOfIdentifier + 1;
+      } else {
+        return make_error<StringError>(
+            "Invalid $ in replacement template near " +
+                ToTemplate.substr(Index),
+            llvm::inconvertibleErrorCode());
+      }
+    } else {
+      size_t NextIndex = ToTemplate.find('$', Index + 1);
+      ParsedTemplate.push_back(
+          TemplateElement{TemplateElement::Literal,
+                          ToTemplate.substr(Index, NextIndex - Index)});
+      Index = NextIndex;
+    }
+  }
+  return std::unique_ptr<ReplaceNodeWithTemplate>(
+      new ReplaceNodeWithTemplate(FromId, std::move(ParsedTemplate)));
+}
+
+void ReplaceNodeWithTemplate::run(
+    const ast_matchers::MatchFinder::MatchResult &Result) {
+  const auto &NodeMap = Result.Nodes.getMap();
+
+  std::string ToText;
+  for (const auto &Element : Template) {
+    switch (Element.Type) {
+    case TemplateElement::Literal:
+      ToText += Element.Value;
+      break;
+    case TemplateElement::Identifier: {
+      auto NodeIter = NodeMap.find(Element.Value);
+      if (NodeIter == NodeMap.end()) {
+        llvm::errs() << "Node " << Element.Value
+                     << " used in replacement template not bound in Matcher \n";
+        llvm::report_fatal_error("Unbound node in replacement template.");
+      }
+      CharSourceRange Source =
+          CharSourceRange::getTokenRange(NodeIter->second.getSourceRange());
+      ToText += Lexer::getSourceText(Source, *Result.SourceManager,
+                                     Result.Context->getLangOpts());
+      break;
+    }
+    }
+  }
+  if (NodeMap.count(FromId) == 0) {
+    llvm::errs() << "Node to be replaced " << FromId
+                 << " not bound in query.\n";
+    llvm::report_fatal_error("FromId node not bound in MatchResult");
+  }
+  auto Replacement =
+      tooling::Replacement(*Result.SourceManager, &NodeMap.at(FromId), ToText,
+                           Result.Context->getLangOpts());
+  llvm::Error Err = Replace.add(Replacement);
+  if (Err) {
+    llvm::errs() << "Query and replace failed in " << Replacement.getFilePath()
+                 << "! " << llvm::toString(std::move(Err)) << "\n";
+    llvm::report_fatal_error("Replacement failed");
+  }
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/test/Analysis/builtin-assume.c b/test/Analysis/builtin-assume.c
new file mode 100644
index 000000000000..00d651d9e3be
--- /dev/null
+++ b/test/Analysis/builtin-assume.c
@@ -0,0 +1,8 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s
+
+void clang_analyzer_eval(int);
+
+void f(int i) {
+  __builtin_assume(i < 10);
+  clang_analyzer_eval(i < 15); // expected-warning {{TRUE}}
+}
diff --git a/test/CXX/drs/dr20xx.cpp b/test/CXX/drs/dr20xx.cpp
new file mode 100644
index 000000000000..b97a9a46bc85
--- /dev/null
+++ b/test/CXX/drs/dr20xx.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors \
+// RUN:            -Wno-variadic-macros -Wno-c11-extensions
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -std=c++1z -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+
+// expected-no-diagnostics
+
+#if __cplusplus < 201103L
+#define static_assert(...) _Static_assert(__VA_ARGS__)
+#endif
+
+namespace dr2094 { // dr2094: 5.0
+  struct A { int n; };
+  struct B { volatile int n; };
+  static_assert(__is_trivially_copyable(volatile int), "");
+  static_assert(__is_trivially_copyable(const volatile int), "");
+  static_assert(__is_trivially_copyable(const volatile int[]), "");
+  static_assert(__is_trivially_copyable(A), "");
+  static_assert(__is_trivially_copyable(volatile A), "");
+  static_assert(__is_trivially_copyable(const volatile A), "");
+  static_assert(__is_trivially_copyable(const volatile A[]), "");
+  static_assert(__is_trivially_copyable(B), "");
+
+  static_assert(__is_trivially_constructible(A, A const&), "");
+  static_assert(__is_trivially_constructible(B, B const&), "");
+
+  static_assert(__is_trivially_assignable(A, const A&), "");
+  static_assert(__is_trivially_assignable(B, const B&), "");
+}
diff --git a/test/CXX/drs/dr4xx.cpp b/test/CXX/drs/dr4xx.cpp
index 3ea226a745f6..a55bb91be558 100644
--- a/test/CXX/drs/dr4xx.cpp
+++ b/test/CXX/drs/dr4xx.cpp
@@ -1202,16 +1202,15 @@ namespace dr495 { // dr495: 3.5
   long n2 = s2;
 }
 
-namespace dr496 { // dr496: no
+namespace dr496 { // dr496: sup dr2094
   struct A { int n; };
   struct B { volatile int n; };
   int check1[ __is_trivially_copyable(const int) ? 1 : -1];
-  int check2[!__is_trivially_copyable(volatile int) ? 1 : -1];
+  // This checks the dr2094 behavior, not dr496
+  int check2[ __is_trivially_copyable(volatile int) ? 1 : -1];
   int check3[ __is_trivially_constructible(A, const A&) ? 1 : -1];
-  // FIXME: This is wrong.
   int check4[ __is_trivially_constructible(B, const B&) ? 1 : -1];
   int check5[ __is_trivially_assignable(A, const A&) ? 1 : -1];
-  // FIXME: This is wrong.
   int check6[ __is_trivially_assignable(B, const B&) ? 1 : -1];
 }
 
diff --git a/test/CodeCompletion/member-access.cpp b/test/CodeCompletion/member-access.cpp
index 66872272ee6d..53af121951bb 100644
--- a/test/CodeCompletion/member-access.cpp
+++ b/test/CodeCompletion/member-access.cpp
@@ -66,3 +66,83 @@ struct Bar {
 
 // Make sure this also doesn't crash
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:47:14 %s
+
+
+template<typename T>
+class BaseTemplate {
+public:
+  T baseTemplateFunction();
+
+  T baseTemplateField;
+};
+
+template<typename T, typename S>
+class TemplateClass: public Base1 , public BaseTemplate<T> {
+public:
+  T function() { }
+  T field;
+
+  void overload1(const T &);
+  void overload1(const S &);
+};
+
+template<typename T, typename S>
+void completeDependentMembers(TemplateClass<T, S> &object,
+                              TemplateClass<int, S> *object2) {
+  object.field;
+  object2->field;
+// CHECK-CC2: baseTemplateField : [#T#][#BaseTemplate<T>::#]baseTemplateField
+// CHECK-CC2: baseTemplateFunction : [#T#][#BaseTemplate<T>::#]baseTemplateFunction()
+// CHECK-CC2: field : [#T#]field
+// CHECK-CC2: function : [#T#]function()
+// CHECK-CC2: member1 : [#int#][#Base1::#]member1
+// CHECK-CC2: member2 : [#float#][#Base1::#]member2
+// CHECK-CC2: overload1 : [#void#]overload1(<#const T &#>)
+// CHECK-CC2: overload1 : [#void#]overload1(<#const S &#>)
+
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:92:10 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:93:12 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+}
+
+
+void completeDependentSpecializedMembers(TemplateClass<int, double> &object,
+                                         TemplateClass<int, double> *object2) {
+  object.field;
+  object2->field;
+// CHECK-CC3: baseTemplateField : [#int#][#BaseTemplate<int>::#]baseTemplateField
+// CHECK-CC3: baseTemplateFunction : [#int#][#BaseTemplate<int>::#]baseTemplateFunction()
+// CHECK-CC3: field : [#int#]field
+// CHECK-CC3: function : [#int#]function()
+// CHECK-CC3: member1 : [#int#][#Base1::#]member1
+// CHECK-CC3: member2 : [#float#][#Base1::#]member2
+// CHECK-CC3: overload1 : [#void#]overload1(<#const int &#>)
+// CHECK-CC3: overload1 : [#void#]overload1(<#const double &#>)
+
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:110:10 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:111:12 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+}
+
+template <typename T>
+class Template {
+public:
+  BaseTemplate<int> o1;
+  BaseTemplate<T> o2;
+
+  void function() {
+    o1.baseTemplateField;
+// CHECK-CC4: BaseTemplate : BaseTemplate::
+// CHECK-CC4: baseTemplateField : [#int#]baseTemplateField
+// CHECK-CC4: baseTemplateFunction : [#int#]baseTemplateFunction()
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:132:8 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
+    o2.baseTemplateField;
+// CHECK-CC5: BaseTemplate : BaseTemplate::
+// CHECK-CC5: baseTemplateField : [#T#]baseTemplateField
+// CHECK-CC5: baseTemplateFunction : [#T#]baseTemplateFunction()
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:137:8 %s -o - | FileCheck -check-prefix=CHECK-CC5 %s
+    this->o1;
+// CHECK-CC6: [#void#]function()
+// CHECK-CC6: o1 : [#BaseTemplate<int>#]o1
+// CHECK-CC6: o2 : [#BaseTemplate<T>#]o2
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:142:11 %s -o - | FileCheck -check-prefix=CHECK-CC6 %s
+  }
+};
diff --git a/test/CodeGen/asan-globals-gc.cpp b/test/CodeGen/asan-globals-gc.cpp
index 6d64f41dda8d..58ce5f067563 100644
--- a/test/CodeGen/asan-globals-gc.cpp
+++ b/test/CodeGen/asan-globals-gc.cpp
@@ -1,5 +1,16 @@
-// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefix=WITH-GC
-// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc -fdata-sections %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-integrated-as -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-integrated-as -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-data-sections -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fdata-sections -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fdata-sections -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefix=WITHOUT-GC
+
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-data-sections -emit-llvm -o - -triple x86_64-apple-macosx11 %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fdata-sections -emit-llvm -o - -triple x86_64-apple-macosx11 %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fdata-sections -emit-llvm -o - -triple x86_64-apple-macosx11 %s | FileCheck %s --check-prefix=WITHOUT-GC
 
 int global;
 
diff --git a/test/CodeGen/asan-no-globals-no-comdat.cpp b/test/CodeGen/asan-no-globals-no-comdat.cpp
new file mode 100644
index 000000000000..4637346f9c41
--- /dev/null
+++ b/test/CodeGen/asan-no-globals-no-comdat.cpp
@@ -0,0 +1,11 @@
+// Test that on Linux asan constructor is placed in a comdat iff globals-gc is on.
+// Even if there are no globals in the module.
+
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITH-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-integrated-as -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-globals-dead-stripping -fno-integrated-as -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+// RUN: %clang_cc1 -fsanitize=address -fdata-sections -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefix=WITHOUT-GC
+
+// WITH-GC: define internal void @asan.module_ctor() comdat {
+// WITHOUT-GC: define internal void @asan.module_ctor() {
diff --git a/test/CodeGen/mips-aggregate-arg.c b/test/CodeGen/mips-aggregate-arg.c
new file mode 100644
index 000000000000..ccf30df7c22a
--- /dev/null
+++ b/test/CodeGen/mips-aggregate-arg.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple mipsel-unknown-linux-gnu -S -emit-llvm -o - %s | FileCheck -check-prefix=O32 %s
+// RUN: %clang_cc1 -triple mips64el-unknown-linux-gnu -S -emit-llvm -o - %s  -target-abi n32 | FileCheck -check-prefix=N32-N64 %s
+// RUN: %clang_cc1 -triple mips64el-unknown-linux-gnu -S -emit-llvm -o - %s  -target-abi n64 | FileCheck -check-prefix=N32-N64 %s
+
+struct t1 {
+  char t1[10];
+};
+
+struct t2 {
+  char t2[20];
+};
+
+struct t3 {
+  char t3[65];
+};
+
+extern struct t1 g1;
+extern struct t2 g2;
+extern struct t3 g3;
+extern void f1(struct t1);
+extern void f2(struct t2);
+extern void f3(struct t3);
+
+void f() {
+
+// O32:  call void @f1(i32 inreg %{{[0-9]+}}, i32 inreg %{{[0-9]+}}, i16 inreg %{{[0-9]+}})
+// O32:  call void @f2(%struct.t2* byval align 4 %{{.*}})
+// O32:  call void @f3(%struct.t3* byval align 4 %{{.*}})
+
+// N32-N64:  call void @f1(i64 inreg %{{[0-9]+}}, i16 inreg %{{[0-9]+}})
+// N32-N64:  call void @f2(i64 inreg %{{[0-9]+}}, i64 inreg %{{[0-9]+}}, i32 inreg %{{[0-9]+}})
+// N32-N64:  call void @f3(%struct.t3* byval align 8 %{{.*}})
+
+  f1(g1);
+  f2(g2);
+  f3(g3);
+}
+
diff --git a/test/CodeGen/sanitize-recover.c b/test/CodeGen/sanitize-recover.c
index d714d58c7f26..6358d9d04aa1 100644
--- a/test/CodeGen/sanitize-recover.c
+++ b/test/CodeGen/sanitize-recover.c
@@ -7,12 +7,12 @@
 void test() {
   extern volatile unsigned x, y, z;
 
-  // RECOVER: uadd.with.overflow.i32
-  // RECOVER: ubsan_handle_add_overflow(
+  // RECOVER: uadd.with.overflow.i32{{.*}}, !nosanitize
+  // RECOVER: ubsan_handle_add_overflow({{.*}}, !nosanitize
   // RECOVER-NOT: unreachable
-  // ABORT: uadd.with.overflow.i32
-  // ABORT: ubsan_handle_add_overflow_abort(
-  // ABORT: unreachable
+  // ABORT: uadd.with.overflow.i32{{.*}}, !nosanitize
+  // ABORT: ubsan_handle_add_overflow_abort({{.*}}, !nosanitize
+  // ABORT: unreachable{{.*}}, !nosanitize
   x = y + z;
 }
 
diff --git a/test/CodeGen/sparcv8-inline-asm.c b/test/CodeGen/sparcv8-inline-asm.c
new file mode 100644
index 000000000000..711a2a0afbb0
--- /dev/null
+++ b/test/CodeGen/sparcv8-inline-asm.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple sparc-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: define float @fabsf(float %a)
+// CHECK: %{{.*}} = call float asm sideeffect "fabss $1, $0;", "=e,f"(float %{{.*}}) #1
+float fabsf(float a) {
+  float res;
+  __asm __volatile__("fabss  %1, %0;"
+                     : /* reg out*/ "=e"(res)
+                     : /* reg in */ "f"(a));
+  return res;
+}
diff --git a/test/CodeGen/thinlto_backend.ll b/test/CodeGen/thinlto_backend.ll
index 813bb62c1a29..86f30c0374fc 100644
--- a/test/CodeGen/thinlto_backend.ll
+++ b/test/CodeGen/thinlto_backend.ll
@@ -12,10 +12,10 @@
 ; RUN: %clang -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR1
 ; CHECK-ERROR1: Error loading index file 'bad.thinlto.bc'
 
-; Ensure we ignore empty index file under -ignore-empty-index-file, and run
-; non-ThinLTO compilation which would not import f2
+; Ensure we ignore empty index file, and run non-ThinLTO compilation which
+; would not import f2
 ; RUN: touch %t4.thinlto.bc
-; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc -mllvm -ignore-empty-index-file
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc
 ; RUN: llvm-nm %t4.o | FileCheck --check-prefix=CHECK-OBJ-IGNORE-EMPTY %s
 ; CHECK-OBJ-IGNORE-EMPTY: T f1
 ; CHECK-OBJ-IGNORE-EMPTY: U f2
diff --git a/test/CodeGen/x86_64-mno-sse.c b/test/CodeGen/x86_64-mno-sse.c
new file mode 100644
index 000000000000..43a695ae3cd3
--- /dev/null
+++ b/test/CodeGen/x86_64-mno-sse.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple x86_64-linux -target-feature -sse -target-feature -sse2 -S -o /dev/null -verify %s
+// REQUIRES: x86-registered-target
+
+double f1(void) { // expected-error {{SSE register return with SSE disabled}}
+  return 1.4;
+}
+extern double g;
+void f2(void) { // expected-error {{SSE register return with SSE disabled}}
+  g = f1();
+}
+void take_double(double);
+void pass_double(void) {
+  // FIXME: Still asserts.
+  //take_double(1.5);
+}
diff --git a/test/CodeGen/xray-customevent.cpp b/test/CodeGen/xray-customevent.cpp
new file mode 100644
index 000000000000..359d92df938a
--- /dev/null
+++ b/test/CodeGen/xray-customevent.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fxray-instrument -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: @_Z16alwaysInstrumentv
+[[clang::xray_always_instrument]] void alwaysInstrument() {
+  static constexpr char kPhase[] = "instrument";
+  __xray_customevent(kPhase, 10);
+  // CHECK: call void @llvm.xray.customevent(i8*{{.*}}, i32 10)
+}
+
+// CHECK-LABEL: @_Z15neverInstrumentv
+[[clang::xray_never_instrument]] void neverInstrument() {
+  static constexpr char kPhase[] = "never";
+  __xray_customevent(kPhase, 5);
+  // CHECK-NOT: call void @llvm.xray.customevent(i8*{{.*}}, i32 5)
+}
+
+// CHECK-LABEL: @_Z21conditionalInstrumenti
+[[clang::xray_always_instrument]] void conditionalInstrument(int v) {
+  static constexpr char kTrue[] = "true";
+  static constexpr char kUntrue[] = "untrue";
+  if (v % 2)
+    __xray_customevent(kTrue, 4);
+  else
+    __xray_customevent(kUntrue, 6);
+
+  // CHECK: call void @llvm.xray.customevent(i8*{{.*}}, i32 4)
+  // CHECK: call void @llvm.xray.customevent(i8*{{.*}}, i32 6)
+}
diff --git a/test/CodeGenCXX/array-default-argument.cpp b/test/CodeGenCXX/array-default-argument.cpp
new file mode 100644
index 000000000000..a07e3908392a
--- /dev/null
+++ b/test/CodeGenCXX/array-default-argument.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s -triple %itanium_abi_triple | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - %s -triple %itanium_abi_triple -std=c++98 -fexceptions -fcxx-exceptions | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EH
+
+struct A {
+  A();
+  ~A();
+};
+
+struct B {
+  B(A = A());
+  ~B();
+};
+
+void f();
+// CHECK-LABEL: define void @_Z1gv()
+void g() {
+  // CHECK: br label %[[LOOP:.*]]
+
+  // [[LOOP]]:
+  // CHECK: {{call|invoke}} {{.*}} @_ZN1AC1Ev([[TEMPORARY:.*]])
+  // CHECK-EH:  unwind label %[[PARTIAL_ARRAY_LPAD:.*]]
+  // CHECK: {{call|invoke}} {{.*}} @_ZN1BC1E1A({{.*}}, [[TEMPORARY]])
+  // CHECK-EH:  unwind label %[[A_AND_PARTIAL_ARRAY_LPAD:.*]]
+  // CHECK: {{call|invoke}} {{.*}} @_ZN1AD1Ev([[TEMPORARY]])
+  // CHECK-EH:  unwind label %[[PARTIAL_ARRAY_LPAD]]
+  // CHECK: getelementptr {{.*}}, i{{[0-9]*}} 1
+  // CHECK: icmp eq
+  // CHECK: br i1 {{.*}} label %[[LOOP]]
+  B b[5];
+
+  // CHECK: {{call|invoke}} void @_Z1fv()
+  f();
+
+  // CHECK-NOT: @_ZN1AD1Ev(
+  // CHECK: {{call|invoke}} {{.*}} @_ZN1BD1Ev(
+}
diff --git a/test/CodeGenCXX/linetable-virtual-variadic.cpp b/test/CodeGenCXX/linetable-virtual-variadic.cpp
index 6f966416867a..cd746cdfdfe2 100644
--- a/test/CodeGenCXX/linetable-virtual-variadic.cpp
+++ b/test/CodeGenCXX/linetable-virtual-variadic.cpp
@@ -12,8 +12,10 @@ void Derived::VariadicFunction(...) { }
 
 // CHECK: define void @_ZN7Derived16VariadicFunctionEz({{.*}} !dbg ![[SP:[0-9]+]]
 // CHECK: ret void, !dbg ![[LOC:[0-9]+]]
-// CHECK-LABEL: define void @_ZT{{.+}}N7Derived16VariadicFunctionEz(
-// CHECK: ret void, !dbg ![[LOC:[0-9]+]]
+// CHECK: define void @_ZT{{.+}}N7Derived16VariadicFunctionEz({{.*}} !dbg ![[SP_I:[0-9]+]]
+// CHECK: ret void, !dbg ![[LOC_I:[0-9]+]]
 //
 // CHECK: ![[SP]] = distinct !DISubprogram(name: "VariadicFunction"
 // CHECK: ![[LOC]] = !DILocation({{.*}}scope: ![[SP]])
+// CHECK: ![[SP_I]] = distinct !DISubprogram(name: "VariadicFunction"
+// CHECK: ![[LOC_I]] = !DILocation({{.*}}scope: ![[SP_I]])
diff --git a/test/CodeGenCXX/vla.cpp b/test/CodeGenCXX/vla.cpp
index 4e22bba7d719..957a9f9568b3 100644
--- a/test/CodeGenCXX/vla.cpp
+++ b/test/CodeGenCXX/vla.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
 
 template<typename T>
 struct S {
@@ -54,3 +54,60 @@ void test0(void *array, int n) {
 
   // CHECK-NEXT: ret void
 }
+
+
+void test2(int b) {
+  // CHECK-LABEL: define void {{.*}}test2{{.*}}(i32 %b)
+  int varr[b];
+  // get the address of %b by checking the first store that stores it 
+  //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
+
+  // get the size of the VLA by getting the first load of the PTR_B
+  //CHECK: [[VLA_NUM_ELEMENTS_PREZEXT:%.*]] = load i32, i32* [[PTR_B]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS_PRE:%.*]] = zext i32 [[VLA_NUM_ELEMENTS_PREZEXT]]
+  
+  b = 15;
+  //CHECK: store i32 15, i32* [[PTR_B]]
+  
+  // Now get the sizeof, and then divide by the element size
+  
+  
+  //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
+  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  for (int d : varr) 0;
+}
+
+void test3(int b, int c) {
+  // CHECK-LABEL: define void {{.*}}test3{{.*}}(i32 %b, i32 %c)
+  int varr[b][c];
+  // get the address of %b by checking the first store that stores it 
+  //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
+  //CHECK-NEXT: store i32 %c, i32* [[PTR_C:%.*]]
+  
+  // get the size of the VLA by getting the first load of the PTR_B
+  //CHECK: [[VLA_DIM1_PREZEXT:%.*]] = load i32, i32* [[PTR_B]]
+  //CHECK-NEXT: [[VLA_DIM1_PRE:%.*]] = zext i32 [[VLA_DIM1_PREZEXT]]
+  //CHECK: [[VLA_DIM2_PREZEXT:%.*]] = load i32, i32* [[PTR_C]]
+  //CHECK-NEXT: [[VLA_DIM2_PRE:%.*]] = zext i32 [[VLA_DIM2_PREZEXT]]
+  
+  b = 15;
+  c = 15;
+  //CHECK: store i32 15, i32* [[PTR_B]]
+  //CHECK: store i32 15, i32* [[PTR_C]]
+  // Now get the sizeof, and then divide by the element size
+  
+  // multiply the two dimensions, then by the element type and then divide by the sizeof dim2
+  //CHECK: [[VLA_DIM1_X_DIM2:%.*]] = mul nuw i64 [[VLA_DIM1_PRE]], [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_DIM1_X_DIM2]]
+  //CHECK-NEXT: [[VLA_SIZEOF_DIM2:%.*]] = mul nuw i64 4, [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]]
+  //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]]
+  //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_END_INDEX]]
+  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+ 
+  for (auto &d : varr) 0;
+}
+
+
diff --git a/test/CodeGenObjC/arc-blocks.m b/test/CodeGenObjC/arc-blocks.m
index 69cd7bb297b2..b84d141037e0 100644
--- a/test/CodeGenObjC/arc-blocks.m
+++ b/test/CodeGenObjC/arc-blocks.m
@@ -752,6 +752,16 @@ void test19(void (^b)(void)) {
 // CHECK-NEXT: call void @objc_release(i8* [[X]])
 // CHECK-NEXT: ret void
 
+// CHECK-UNOPT-LABEL: define void @test20(
+// CHECK-UNOPT: [[XADDR:%.*]] = alloca i8*
+// CHECK-UNOPT-NEXT: [[BLOCK:%.*]] = alloca <[[BLOCKTY:.*]]>
+// CHECK-UNOPT: [[CAPTUREFIELD:%.*]] = getelementptr inbounds <[[BLOCKTY]]>, <[[BLOCKTY]]>* [[BLOCK]], i32 0, i32 5
+// CHECK-UNOPT: [[BLOCKCAPTURED:%.*]] = getelementptr inbounds <[[BLOCKTY]]>, <[[BLOCKTY]]>* [[BLOCK]], i32 0, i32 5
+// CHECK-UNOPT: [[CAPTURED:%.*]] = load i8*, i8** [[XADDR]]
+// CHECK-UNOPT: [[RETAINED:%.*]] = call i8* @objc_retain(i8* [[CAPTURED]])
+// CHECK-UNOPT: store i8* [[RETAINED]], i8** [[BLOCKCAPTURED]]
+// CHECK-UNOPT: call void @objc_storeStrong(i8** [[CAPTUREFIELD]], i8* null)
+
 // CHECK-LABEL: define internal void @__copy_helper_block
 // CHECK: [[BLOCKSOURCE:%.*]] = bitcast i8* %{{.*}} to <[[BLOCKTY]]>*
 // CHECK: [[CAPTUREFIELD:%.*]] = getelementptr inbounds <[[BLOCKTY]]>, <[[BLOCKTY]]>* [[BLOCKSOURCE]], i32 0, i32 5
diff --git a/test/CodeGenObjC/arc-foreach.m b/test/CodeGenObjC/arc-foreach.m
index e3b3f1aa5ce4..77cb068187fa 100644
--- a/test/CodeGenObjC/arc-foreach.m
+++ b/test/CodeGenObjC/arc-foreach.m
@@ -68,7 +68,8 @@ void test0(NSArray *array) {
 // CHECK-LP64:      [[D0:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[BLOCK]], i32 0, i32 5
 // CHECK-LP64:      [[T0:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[BLOCK]], i32 0, i32 5
 // CHECK-LP64-NEXT: [[T1:%.*]] = load i8*, i8** [[X]]
-// CHECK-LP64-NEXT: store i8* [[T1]], i8** [[T0]]
+// CHECK-LP64-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]])
+// CHECK-LP64-NEXT: store i8* [[T2]], i8** [[T0]]
 // CHECK-LP64-NEXT: [[BLOCK1:%.*]] = bitcast [[BLOCK_T]]* [[BLOCK]]
 // CHECK-LP64-NEXT: call void @use_block(void ()* [[BLOCK1]])
 // CHECK-LP64-NEXT: call void @objc_storeStrong(i8** [[D0]], i8* null)
@@ -209,7 +210,8 @@ NSArray *array4;
 // CHECK-LP64:         [[T0:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [[TY]]* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [[TY]]* }>* [[BLOCK]], i32 0, i32 5
 // CHECK-LP64:         [[BC:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [[TY]]* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [[TY]]* }>* [[BLOCK]], i32 0, i32 5
 // CHECK-LP64:         [[T1:%.*]] = load [[TY]]*, [[TY]]** [[SELF_ADDR]]
-// CHECK-LP64:         store [[TY]]* [[T1]], [[TY]]** [[BC]], align 8
+// CHECK-LP64:         [[T2:%.*]] = bitcast [[TY]]* [[T1]] to i8*
+// CHECK-LP64:         call i8* @objc_retain(i8* [[T2]])
 
 // CHECK-LP64-OPT-LABEL: define internal void @"\01-[I1 foo2]"(
 // CHECK-LP64-OPT: [[TY:%.*]]* %self
diff --git a/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl b/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
index c0b30095a679..7baee5eee391 100644
--- a/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
+++ b/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
@@ -58,16 +58,16 @@ kernel void kernel1(
   // CHECK-DAG: !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
   int *FuncVar4 = Tmp1;
 
-  // CHECK-DAG: !DILocalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
-  global int *constant FuncVar5 = KernelArg0;
-  // CHECK-DAG: !DILocalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
-  constant int *constant FuncVar6 = KernelArg1;
-  // CHECK-DAG: !DILocalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]])
-  local int *constant FuncVar7 = KernelArg2;
-  // CHECK-DAG: !DILocalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]])
-  private int *constant FuncVar8 = Tmp0;
-  // CHECK-DAG: !DILocalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]])
-  int *constant FuncVar9 = Tmp1;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  global int *constant FuncVar5 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  constant int *constant FuncVar6 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_LOCAL]], isLocal: true, isDefinition: true)
+  local int *constant FuncVar7 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_PRIVATE]], isLocal: true, isDefinition: true)
+  private int *constant FuncVar8 = 0;
+  // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
+  int *constant FuncVar9 = 0;
 
   // CHECK-DAG: distinct !DIGlobalVariable(name: "FuncVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: ![[DWARF_ADDRESS_SPACE_NONE]], isLocal: true, isDefinition: true)
   global int *local FuncVar10; FuncVar10 = KernelArg0;
diff --git a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
index a962d3c75aaf..c0a4c21ce760 100644
--- a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -80,21 +80,21 @@ kernel void kernel1(
   // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR4]], metadata ![[PRIVATE]]), !dbg !{{[0-9]+}}
   int *FuncVar4 = Tmp1;
 
-  // CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = !DILocalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[FUNCVAR5]], metadata ![[NONE:[0-9]+]]), !dbg !{{[0-9]+}}
-  global int *constant FuncVar5 = KernelArg0;
-  // CHECK-DAG: ![[FUNCVAR6:[0-9]+]] = !DILocalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[FUNCVAR6]], metadata ![[NONE]]), !dbg !{{[0-9]+}}
-  constant int *constant FuncVar6 = KernelArg1;
-  // CHECK-DAG: ![[FUNCVAR7:[0-9]+]] = !DILocalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[FUNCVAR7]], metadata ![[NONE]]), !dbg !{{[0-9]+}}
-  local int *constant FuncVar7 = KernelArg2;
-  // CHECK-DAG: ![[FUNCVAR8:[0-9]+]] = !DILocalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[FUNCVAR8]], metadata ![[NONE]]), !dbg !{{[0-9]+}}
-  private int *constant FuncVar8 = Tmp0;
-  // CHECK-DAG: ![[FUNCVAR9:[0-9]+]] = !DILocalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
-  // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR9]], metadata ![[NONE]]), !dbg !{{[0-9]+}}
-  int *constant FuncVar9 = Tmp1;
+  // CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR5]])
+  global int *constant FuncVar5 = 0;
+  // CHECK-DAG: ![[FUNCVAR6:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar6", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR6]])
+  constant int *constant FuncVar6 = 0;
+  // CHECK-DAG: ![[FUNCVAR7:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR7]])
+  local int *constant FuncVar7 = 0;
+  // CHECK-DAG: ![[FUNCVAR8:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar8", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR8]])
+  private int *constant FuncVar8 = 0;
+  // CHECK-DAG: ![[FUNCVAR9:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar9", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
+  // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR9]])
+  int *constant FuncVar9 = 0;
 
   // CHECK-DAG: ![[FUNCVAR10:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar10", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
   // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR10]], expr: ![[LOCAL]])
diff --git a/test/CodeGenOpenCL/constant-addr-space-globals.cl b/test/CodeGenOpenCL/constant-addr-space-globals.cl
index 4f0d1ea23e56..7bb970527c26 100644
--- a/test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ b/test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -11,10 +11,11 @@ kernel void test(global float *out) {
 // but create a copy in the original address space (unless a variable itself is
 // in the constant address space).
 
-void foo(constant const int *p1, const int *p2, const int *p3);
+void foo(constant int* p, constant const int *p1, const int *p2, const int *p3);
 // CHECK: @k.arr1 = internal addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3]
 // CHECK: @k.arr2 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 4, i32 5, i32 6]
 // CHECK: @k.arr3 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 7, i32 8, i32 9]
+// CHECK: @k.var1 = internal addrspace(2) constant i32 1
 kernel void k(void) {
   // CHECK-NOT: %arr1 = alloca [3 x i32]
   constant const int arr1[] = {1, 2, 3};
@@ -23,5 +24,8 @@ kernel void k(void) {
   // CHECK: %arr3 = alloca [3 x i32]
   int arr3[] = {7, 8, 9};
 
-  foo(arr1, arr2, arr3);
+  constant int var1 = 1;
+  
+  // CHECK: call spir_func void @foo(i32 addrspace(2)* @k.var1, i32 addrspace(2)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(2)* @k.arr1, i32 0, i32 0)
+  foo(&var1, arr1, arr2, arr3);
 }
diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
index 05e239c74243..41f573aa7316 100644
--- a/test/Driver/fsanitize.c
+++ b/test/Driver/fsanitize.c
@@ -30,7 +30,7 @@
 // RUN: %clang -target x86_64-pc-win32 -fsanitize-coverage=bb %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-COVERAGE-WIN64
 // CHECK-COVERAGE-WIN64: "--dependent-lib={{[^"]*}}ubsan_standalone-x86_64.lib"
 
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope"
 // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent),?){5}"}}
 
 // RUN: %clang -fsanitize=bounds -### -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix=CHECK-BOUNDS
@@ -126,6 +126,13 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-WITHOUT-USE-AFTER-SCOPE
 // CHECK-ASAN-WITHOUT-USE-AFTER-SCOPE: -cc1{{.*}}address-use-after-scope
 
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-address-globals-dead-stripping %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-GLOBALS
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ASAN-GLOBALS
+// RUN: %clang_cl --target=x86_64-windows-msvc -fsanitize=address -fsanitize-address-globals-dead-stripping -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-GLOBALS
+// RUN: %clang_cl --target=x86_64-windows-msvc -fsanitize=address -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-GLOBALS
+// CHECK-ASAN-GLOBALS: -cc1{{.*}}-fsanitize-address-globals-dead-stripping
+// CHECK-NO-ASAN-GLOBALS-NOT: -cc1{{.*}}-fsanitize-address-globals-dead-stripping
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-memory-track-origins -pie %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-TRACK-ORIGINS
 // CHECK-ONLY-TRACK-ORIGINS: warning: argument unused during compilation: '-fsanitize-memory-track-origins'
 
diff --git a/test/Driver/myriad-toolchain.c b/test/Driver/myriad-toolchain.c
index 971e5d3e6961..7bd215bf1a48 100644
--- a/test/Driver/myriad-toolchain.c
+++ b/test/Driver/myriad-toolchain.c
@@ -54,9 +54,11 @@
 // -fno-split-dwarf-inlining is consumed but not passed to moviCompile.
 // RUN: %clang -target shave-myriad -c -### %s -g -fno-inline-functions \
 // RUN: -fno-inline-functions-called-once -Os -Wall -MF dep.d -fno-split-dwarf-inlining \
-// RUN: -ffunction-sections 2>&1 | FileCheck %s -check-prefix=PASSTHRU_OPTIONS
+// RUN: -ffunction-sections -Xclang -xclangflag -mllvm -llvm-flag 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=PASSTHRU_OPTIONS
 // PASSTHRU_OPTIONS: "-g" "-fno-inline-functions" "-fno-inline-functions-called-once"
 // PASSTHRU_OPTIONS: "-Os" "-Wall" "-MF" "dep.d" "-ffunction-sections"
+// PASSTHRU_OPTIONS: "-Xclang" "-xclangflag" "-mllvm" "-llvm-flag"
 
 // RUN: %clang -target shave-myriad -c %s -o foo.o -### -MD -MF dep.d 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=MDMF
diff --git a/test/Driver/wasm-toolchain.c b/test/Driver/wasm-toolchain.c
index d0b029303891..3be60df92672 100644
--- a/test/Driver/wasm-toolchain.c
+++ b/test/Driver/wasm-toolchain.c
@@ -27,18 +27,18 @@
 
 // RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK %s
 // LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK: lld{{.*}}" "-flavor" "ld" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// LINK: lld{{.*}}" "-flavor" "wasm" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
 
 // A basic C link command-line with optimization. WebAssembly is somewhat
 // special in enabling --gc-sections by default.
 
 // RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s 2>&1 | FileCheck -check-prefix=LINK_OPT %s
 // LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// LINK_OPT: lld{{.*}}" "-flavor" "ld" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// LINK_OPT: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
 
 // Ditto, but ensure that a user --no-gc-sections comes after the
 // default --gc-sections.
 
 // RUN: %clang -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo -Wl,--no-gc-sections %s 2>&1 | FileCheck -check-prefix=NO_GC_SECTIONS %s
 // NO_GC_SECTIONS: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
-// NO_GC_SECTIONS: lld{{.*}}" "-flavor" "ld" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "--no-gc-sections" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
+// NO_GC_SECTIONS: lld{{.*}}" "-flavor" "wasm" "--gc-sections" "-L/foo/lib32" "crt1.o" "crti.o" "--no-gc-sections" "[[temp]]" "-lc" "-lcompiler_rt" "crtn.o" "-o" "a.out"
diff --git a/test/FixIt/fixit-availability.c b/test/FixIt/fixit-availability.c
index fa641b4b98c6..038dee08b13c 100644
--- a/test/FixIt/fixit-availability.c
+++ b/test/FixIt/fixit-availability.c
@@ -5,6 +5,6 @@ int function(void);
 
 void use() {
   function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (__builtin_available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (__builtin_available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:14-[[@LINE-2]]:14}:"\n  } else {\n      // Fallback on earlier versions\n  }"
 }
diff --git a/test/FixIt/fixit-availability.mm b/test/FixIt/fixit-availability.mm
index 6bf8f49ddc0d..d044a73efdb9 100644
--- a/test/FixIt/fixit-availability.mm
+++ b/test/FixIt/fixit-availability.mm
@@ -7,58 +7,58 @@ void anotherFunction(int function);
 
 int use() {
   function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:14-[[@LINE-2]]:14}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   int y = function(), x = 0;
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:29-[[@LINE-2]]:29}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   x += function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:19-[[@LINE-2]]:19}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   if (1) {
     x = function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:20-[[@LINE-2]]:20}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   }
   anotherFunction(function());
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:31-[[@LINE-2]]:31}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   if (function()) {
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE+1]]:4-[[@LINE+1]]:4}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   }
   while (function())
-    // CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+    // CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
     // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE+1]]:6-[[@LINE+1]]:6}:"\n  } else {\n      // Fallback on earlier versions\n  }"
     ;
   do
     function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n    } else {\n        // Fallback on earlier versions\n    }"
   while (1);
   for (int i = 0; i < 10; ++i)
     function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n    } else {\n        // Fallback on earlier versions\n    }"
   switch (x) {
   case 0:
     function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n    } else {\n        // Fallback on earlier versions\n    }"
   case 2:
     anotherFunction(1);
     function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n    } else {\n        // Fallback on earlier versions\n    }"
     break;
   default:
     function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n    } else {\n        // Fallback on earlier versions\n    }"
     break;
   }
   return function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:21-[[@LINE-2]]:21}:"\n  } else {\n      // Fallback on earlier versions\n  }"
 }
 
@@ -72,39 +72,39 @@ int use() {
 
 void useInMacros() {
   MYFUNCTION();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:16-[[@LINE-2]]:16}:"\n  } else {\n      // Fallback on earlier versions\n  }"
 
   MACRO_ARGUMENT_SEMI(function())
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:34-[[@LINE-2]]:34}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   MACRO_ARGUMENT(function());
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:30-[[@LINE-2]]:30}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   MACRO_ARGUMENT_2(function());
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:32-[[@LINE-2]]:32}:"\n  } else {\n      // Fallback on earlier versions\n  }"
 
   INNER_MACRO
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:14-[[@LINE-2]]:14}:"\n  } else {\n      // Fallback on earlier versions\n  }"
 }
 
 void wrapDeclStmtUses() {
   int x = 0, y = function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macos 10.12, *)) {\n      "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:3-[[@LINE-1]]:3}:"if (@available(macOS 10.12, *)) {\n      "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE+13]]:22-[[@LINE+13]]:22}:"\n  } else {\n      // Fallback on earlier versions\n  }"
   {
     int z = function();
     if (z) {
 
     }
-// CHECK: fix-it:{{.*}}:{[[@LINE-4]]:5-[[@LINE-4]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-4]]:5-[[@LINE-4]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:6-[[@LINE-2]]:6}:"\n    } else {\n        // Fallback on earlier versions\n    }"
   }
   if (y)
     int z = function();
-// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macos 10.12, *)) {\n        "
+// CHECK: fix-it:{{.*}}:{[[@LINE-1]]:5-[[@LINE-1]]:5}:"if (@available(macOS 10.12, *)) {\n        "
 // CHECK-NEXT: fix-it:{{.*}}:{[[@LINE-2]]:24-[[@LINE-2]]:24}:"\n    } else {\n        // Fallback on earlier versions\n    }"
   anotherFunction(y);
   anotherFunction(x);
diff --git a/test/Import/conflicting-struct/Inputs/S1.cpp b/test/Import/conflicting-struct/Inputs/S1.cpp
new file mode 100644
index 000000000000..a99dba8c7821
--- /dev/null
+++ b/test/Import/conflicting-struct/Inputs/S1.cpp
@@ -0,0 +1,6 @@
+class T;
+
+class S {
+  T *t;
+  int a;
+};
diff --git a/test/Import/conflicting-struct/Inputs/S2.cpp b/test/Import/conflicting-struct/Inputs/S2.cpp
new file mode 100644
index 000000000000..de2cb6cd03c7
--- /dev/null
+++ b/test/Import/conflicting-struct/Inputs/S2.cpp
@@ -0,0 +1,7 @@
+class U {
+  int b;
+};
+
+class T {
+  U u;
+};
diff --git a/test/Import/conflicting-struct/test.cpp b/test/Import/conflicting-struct/test.cpp
new file mode 100644
index 000000000000..5aad567cd794
--- /dev/null
+++ b/test/Import/conflicting-struct/test.cpp
@@ -0,0 +1,7 @@
+// RUN: clang-import-test --import %S/Inputs/S1.cpp --import %S/Inputs/S2.cpp -expression %s
+void expr() {
+  S MyS;
+  T MyT;
+  MyS.a = 3;
+  MyT.u.b = 2;
+}
diff --git a/test/Index/Core/index-dependent-source.cpp b/test/Index/Core/index-dependent-source.cpp
new file mode 100644
index 000000000000..8c097b935a37
--- /dev/null
+++ b/test/Index/Core/index-dependent-source.cpp
@@ -0,0 +1,143 @@
+// RUN: c-index-test core -print-source-symbols -- %s -std=c++14 -target x86_64-apple-macosx10.7 | FileCheck %s
+
+int invalid;
+
+class Base {
+  void baseFunction();
+
+  int baseField;
+
+  static void staticBaseFunction();
+};
+
+template<typename T>
+class BaseTemplate {
+public:
+  T baseTemplateFunction();
+
+  T baseTemplateField;
+
+  static T baseTemplateVariable;
+};
+
+template<typename T, typename S>
+class TemplateClass: public Base , public BaseTemplate<T> {
+public:
+  ~TemplateClass();
+
+  T function() { }
+
+  static void staticFunction() { }
+
+  T field;
+
+  static T variable;
+
+  struct Struct { };
+
+  enum Enum { EnumValue };
+
+  using TypeAlias = S;
+  typedef T Typedef;
+
+  void overload1(const T &);
+  void overload1(const S &);
+};
+
+template<typename T, typename S>
+void indexSimpleDependentDeclarations(const TemplateClass<T, S> &object) {
+  // Valid instance members:
+  object.function();
+// CHECK: [[@LINE-1]]:10 | instance-method/C++ | function | c:@ST>2#T#T@TemplateClass@F@function# | <no-cgname> | Ref,Call,RelCall,RelCont | rel: 1
+  object.field;
+// CHECK: [[@LINE-1]]:10 | field/C++ | field | c:@ST>2#T#T@TemplateClass@FI@field | <no-cgname> | Ref,RelCont | rel: 1
+  object.baseFunction();
+// CHECK: [[@LINE-1]]:10 | instance-method/C++ | baseFunction | c:@S@Base@F@baseFunction# | __ZN4Base12baseFunctionEv | Ref,Call,RelCall,RelCont | rel: 1
+  object.baseField;
+// CHECK: [[@LINE-1]]:10 | field/C++ | baseField | c:@S@Base@FI@baseField | <no-cgname> | Ref,RelCont | rel: 1
+  object.baseTemplateFunction();
+// CHECK: [[@LINE-1]]:10 | instance-method/C++ | baseTemplateFunction | c:@ST>1#T@BaseTemplate@F@baseTemplateFunction# | <no-cgname> | Ref,Call,RelCall,RelCont | rel: 1
+  object.baseTemplateField;
+// CHECK: [[@LINE-1]]:10 | field/C++ | baseTemplateField | c:@ST>1#T@BaseTemplate@FI@baseTemplateField | <no-cgname> | Ref,RelCont | rel: 1
+
+  // Invalid instance members:
+  object.variable;
+// CHECK-NOT: [[@LINE-1]]:10
+  object.staticFunction();
+// CHECK-NOT: [[@LINE-1]]:10
+  object.Struct;
+// CHECK-NOT: [[@LINE-1]]:10
+  object.EnumValue;
+// CHECK-NOT: [[@LINE-1]]:10
+
+  // Valid static members:
+  TemplateClass<T, S>::staticFunction();
+// CHECK: [[@LINE-1]]:24 | static-method/C++ | staticFunction | c:@ST>2#T#T@TemplateClass@F@staticFunction#S | <no-cgname> | Ref,Call,RelCall,RelCont | rel: 1
+  TemplateClass<T, S>::variable;
+// CHECK: [[@LINE-1]]:24 | static-property/C++ | variable | c:@ST>2#T#T@TemplateClass@variable | __ZN13TemplateClass8variableE | Ref,RelCont | rel: 1
+  TemplateClass<T, S>::staticBaseFunction();
+// CHECK: [[@LINE-1]]:24 | static-method/C++ | staticBaseFunction | c:@S@Base@F@staticBaseFunction#S | __ZN4Base18staticBaseFunctionEv | Ref,Call,RelCall,RelCont | rel: 1
+  TemplateClass<T, S>::baseTemplateVariable;
+// CHECK: [[@LINE-1]]:24 | static-property/C++ | baseTemplateVariable | c:@ST>1#T@BaseTemplate@baseTemplateVariable | __ZN12BaseTemplate20baseTemplateVariableE | Ref,RelCont | rel: 1
+  TemplateClass<T, S>::EnumValue;
+// CHECK: [[@LINE-1]]:24 | enumerator/C | EnumValue | c:@ST>2#T#T@TemplateClass@E@Enum@EnumValue | <no-cgname> | Ref,RelCont | rel: 1
+  TemplateClass<T, S>::Struct();
+// CHECK: [[@LINE-1]]:24 | struct/C | Struct | c:@ST>2#T#T@TemplateClass@S@Struct | <no-cgname> | Ref,Call,RelCall,RelCont | rel: 1
+
+  // Invalid static members:
+  TemplateClass<T, S>::field;
+// CHECK-NOT: [[@LINE-1]]:24
+  TemplateClass<T, S>::function();
+// CHECK-NOT: [[@LINE-1]]:24
+
+  // Valid type names:
+  typename TemplateClass<T, S>::Struct Val;
+// CHECK: [[@LINE-1]]:33 | struct/C | Struct | c:@ST>2#T#T@TemplateClass@S@Struct | <no-cgname> | Ref,RelCont | rel: 1
+  typename TemplateClass<T, S>::Enum EnumVal;
+// CHECK: [[@LINE-1]]:33 | enum/C | Enum | c:@ST>2#T#T@TemplateClass@E@Enum | <no-cgname> | Ref,RelCont | rel: 1
+  typename TemplateClass<T, S>::TypeAlias Val2;
+// CHECK: [[@LINE-1]]:33 | type-alias/C++ | TypeAlias | c:@ST>2#T#T@TemplateClass@TypeAlias | <no-cgname> | Ref,RelCont | rel: 1
+  typename TemplateClass<T, S>::Typedef Val3;
+// CHECK: [[@LINE-1]]:33 | type-alias/C | Typedef | c:{{.*}}index-dependent-source.cpp@ST>2#T#T@TemplateClass@T@Typedef | <no-cgname> | Ref,RelCont | rel: 1
+
+  // Invalid type names:
+  typename TemplateClass<T, S>::field Val4;
+// CHECK-NOT: [[@LINE-1]]:33
+  typename TemplateClass<T, S>::staticFunction Val5;
+// CHECK-NOT: [[@LINE-1]]:33
+
+
+  object.invalid;
+// CHECK-NOT: [[@LINE-1]]:10
+  TemplateClass<T, S>::invalid;
+// CHECK-NOT: [[@LINE-1]]:24
+}
+
+template<typename T, typename S, typename Y>
+void indexDependentOverloads(const TemplateClass<T, S> &object) {
+  object.overload1(T());
+// CHECK-NOT: [[@LINE-1]]
+  object.overload1(S());
+// CHECK-NOT: [[@LINE-1]]
+  object.overload1(Y());
+// CHECK-NOT: [[@LINE-1]]
+}
+
+template<typename T> struct UndefinedTemplateClass;
+
+template<typename T>
+void undefinedTemplateLookup(UndefinedTemplateClass<T> &x) {
+// Shouldn't crash!
+  x.lookup;
+  typename UndefinedTemplateClass<T>::Type y;
+}
+
+template<typename T>
+struct UserOfUndefinedTemplateClass: UndefinedTemplateClass<T> { };
+
+template<typename T>
+void undefinedTemplateLookup2(UserOfUndefinedTemplateClass<T> &x) {
+// Shouldn't crash!
+  x.lookup;
+  typename UserOfUndefinedTemplateClass<T>::Type y;
+}
diff --git a/test/Index/Core/index-instantiated-source.cpp b/test/Index/Core/index-instantiated-source.cpp
new file mode 100644
index 000000000000..f61795da540c
--- /dev/null
+++ b/test/Index/Core/index-instantiated-source.cpp
@@ -0,0 +1,39 @@
+// RUN: c-index-test core -print-source-symbols -- %s -std=c++14 -target x86_64-apple-macosx10.7 | FileCheck %s
+// References to declarations in instantiations should be canonicalized:
+
+template<typename T>
+class BaseTemplate {
+public:
+  T baseTemplateFunction();
+// CHECK: [[@LINE-1]]:5 | instance-method/C++ | baseTemplateFunction | c:@ST>1#T@BaseTemplate@F@baseTemplateFunction#
+
+  T baseTemplateField;
+// CHECK: [[@LINE-1]]:5 | field/C++ | baseTemplateField | c:@ST>1#T@BaseTemplate@FI@baseTemplateField
+};
+
+template<typename T, typename S>
+class TemplateClass: public BaseTemplate<T> {
+public:
+  T function() { return T(); }
+// CHECK: [[@LINE-1]]:5 | instance-method/C++ | function | c:@ST>2#T#T@TemplateClass@F@function#
+
+  static void staticFunction() { }
+// CHECK: [[@LINE-1]]:15 | static-method/C++ | staticFunction | c:@ST>2#T#T@TemplateClass@F@staticFunction#S
+
+  T field;
+// CHECK: [[@LINE-1]]:5 | field/C++ | field | c:@ST>2#T#T@TemplateClass@FI@field
+};
+
+void canonicalizeInstaniationReferences(TemplateClass<int, float> &object) {
+  (void)object.function();
+// CHECK: [[@LINE-1]]:16 | instance-method/C++ | function | c:@ST>2#T#T@TemplateClass@F@function# | <no-cgname>
+  (void)object.field;
+// CHECK: [[@LINE-1]]:16 | field/C++ | field | c:@ST>2#T#T@TemplateClass@FI@field | <no-cgname> | Ref,RelCont | rel: 1
+  (void)object.baseTemplateFunction();
+// CHECK: [[@LINE-1]]:16 | instance-method/C++ | baseTemplateFunction | c:@ST>1#T@BaseTemplate@F@baseTemplateFunction# | <no-cgname>
+  (void)object.baseTemplateField;
+// CHECK: [[@LINE-1]]:16 | field/C++ | baseTemplateField | c:@ST>1#T@BaseTemplate@FI@baseTemplateField | <no-cgname> | Ref,RelCont | rel: 1
+
+  TemplateClass<int, float>::staticFunction();
+// CHECK: [[@LINE-1]]:30 | static-method/C++ | staticFunction | c:@ST>2#T#T@TemplateClass@F@staticFunction#S | <no-cgname
+}
diff --git a/test/Index/Core/index-source.cpp b/test/Index/Core/index-source.cpp
index 6f902610e673..9248e86ff697 100644
--- a/test/Index/Core/index-source.cpp
+++ b/test/Index/Core/index-source.cpp
@@ -287,3 +287,55 @@ class PartialSpecilizationClass<Cls, Cls> : Cls { };
 // CHECK-NEXT: [[@LINE-5]]:7 | class(Gen)/C++ | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass | <no-cgname> | Ref | rel: 0
 // CHECK-NEXT: [[@LINE-6]]:33 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref | rel: 0
 // CHECK-NEXT: [[@LINE-7]]:38 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref | rel: 0
+
+template<typename T, int x>
+void functionSp() { }
+
+struct Record {
+  constexpr static int C = 2;
+};
+
+template<>
+void functionSp<SpecializationDecl<Cls>, Record::C>() {
+// CHECK: [[@LINE-1]]:6 | function(Gen,TS)/C++ | functionSp | c:@F@functionSp<#$@S@SpecializationDecl>#$@S@Cls#VI2># | __Z10functionSpI18SpecializationDeclI3ClsELi2EEvv | Def,RelSpecialization | rel: 1
+// CHECK:   RelSpecialization | functionSp | c:@FT@>2#T#NIfunctionSp#v#
+// CHECK: [[@LINE-3]]:17 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-4]]:36 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-5]]:50 | static-property/C++ | C | c:@S@Record@C | __ZN6Record1CE | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-6]]:42 | struct/C++ | Record | c:@S@Record | <no-cgname> | Ref,RelCont | rel: 1
+}
+
+template<typename T, int x>
+class ClassWithCorrectSpecialization { };
+
+template<>
+class ClassWithCorrectSpecialization<SpecializationDecl<Cls>, Record::C> { };
+// CHECK: [[@LINE-1]]:38 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl | <no-cgname> | Ref | rel: 0
+// CHECK: [[@LINE-2]]:57 | class/C++ | Cls | c:@S@Cls | <no-cgname> | Ref | rel: 0
+// CHECK: [[@LINE-3]]:71 | static-property/C++ | C | c:@S@Record@C | __ZN6Record1CE | Ref,Read | rel: 0
+// CHECK: [[@LINE-4]]:63 | struct/C++ | Record | c:@S@Record | <no-cgname> | Ref | rel: 0
+
+namespace ns {
+// CHECK: [[@LINE-1]]:11 | namespace/C++ | ns | c:@N@ns | <no-cgname> | Decl | rel: 0
+namespace inner {
+// CHECK: [[@LINE-1]]:11 | namespace/C++ | inner | c:@N@ns@N@inner | <no-cgname> | Decl,RelChild | rel: 1
+void func();
+
+}
+namespace innerAlias = inner;
+// CHECK: [[@LINE-1]]:11 | namespace-alias/C++ | innerAlias | c:@N@ns@NA@innerAlias | <no-cgname> | Decl,RelChild | rel: 1
+// CHECK: [[@LINE-2]]:24 | namespace/C++ | inner | c:@N@ns@N@inner | <no-cgname> | Ref,RelCont | rel: 1
+}
+
+namespace namespaceAlias = ::ns::innerAlias;
+// CHECK: [[@LINE-1]]:11 | namespace-alias/C++ | namespaceAlias | c:@NA@namespaceAlias | <no-cgname> | Decl | rel: 0
+// CHECK: [[@LINE-2]]:30 | namespace/C++ | ns | c:@N@ns | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-3]]:34 | namespace-alias/C++ | innerAlias | c:@N@ns@NA@innerAlias | <no-cgname> | Ref,RelCont | rel: 1
+
+void ::ns::inner::func() {
+// CHECK: [[@LINE-1]]:8 | namespace/C++ | ns | c:@N@ns | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-2]]:12 | namespace/C++ | inner | c:@N@ns@N@inner | <no-cgname> | Ref,RelCont | rel: 1
+  ns::innerAlias::func();
+// CHECK: [[@LINE-1]]:3 | namespace/C++ | ns | c:@N@ns | <no-cgname> | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-2]]:7 | namespace-alias/C++ | innerAlias | c:@N@ns@NA@innerAlias | <no-cgname> | Ref,RelCont | rel: 1
+}
diff --git a/test/Index/complete-available.m b/test/Index/complete-available.m
new file mode 100644
index 000000000000..8267dbdddc5c
--- /dev/null
+++ b/test/Index/complete-available.m
@@ -0,0 +1,20 @@
+/* The run lines are below, because this test is line- and
+   column-number sensitive. */
+void atAvailable() {
+  if (@available(macOS 10.10, *)) {
+
+  }
+  if (__builtin_available(iOS 8, *)) {
+  }
+}
+
+// RUN: c-index-test -code-completion-at=%s:4:18 %s | FileCheck -check-prefix=CHECK %s
+// RUN: c-index-test -code-completion-at=%s:7:27 %s | FileCheck -check-prefix=CHECK %s
+// CHECK: {TypedText iOS} (40)
+// CHECK: {TypedText iOSApplicationExtension} (40)
+// CHECK: {TypedText macOS} (40)
+// CHECK: {TypedText macOSApplicationExtension} (40)
+// CHECK: {TypedText tvOS} (40)
+// CHECK: {TypedText tvOSApplicationExtension} (40)
+// CHECK: {TypedText watchOS} (40)
+// CHECK: {TypedText watchOSApplicationExtension} (40)
diff --git a/test/Index/get-cursor.m b/test/Index/get-cursor.m
index af277d45fdf4..e85d49fb0c3e 100644
--- a/test/Index/get-cursor.m
+++ b/test/Index/get-cursor.m
@@ -154,6 +154,12 @@ SomeT someVar;
 typedef MY_TYPE2(SomeT2) { int x; };
 SomeT2 someVar2;
 
+#define GEN_DECL(mod_name) __attribute__((external_source_symbol(language="Swift", defined_in=mod_name, generated_declaration)))
+
+GEN_DECL("some_module")
+@interface ExtCls
+-(void)method;
+@end
 
 // RUN: c-index-test -cursor-at=%s:4:28 -cursor-at=%s:5:28 %s | FileCheck -check-prefix=CHECK-PROP %s
 // CHECK-PROP: ObjCPropertyDecl=foo1:4:26
@@ -226,3 +232,8 @@ SomeT2 someVar2;
 // CHECK-TRANSPARENT: 147:1 TypeRef=TokenPaste_t:144:9 Extent=[147:1 - 147:13] Spelling=TokenPaste_t ([147:1 - 147:13])
 // CHECK-TRANSPARENT: 151:1 TypeRef=SomeT:150:17 (Transparent: struct SomeT) Extent=[151:1 - 151:6] Spelling=SomeT ([151:1 - 151:6])
 // CHECK-TRANSPARENT: 155:1 TypeRef=SomeT2:154:18 Extent=[155:1 - 155:7] Spelling=SomeT2 ([155:1 - 155:7])
+
+// RUN: c-index-test -cursor-at=%s:160:12 -cursor-at=%s:161:8 %s | FileCheck -check-prefix=CHECK-EXTERNAL %s
+// CHECK-EXTERNAL: 160:12 ObjCInterfaceDecl=ExtCls:160:12 (external lang: Swift, defined: some_module, gen: 1)
+// CHECK-EXTERNAL: 161:8 ObjCInstanceMethodDecl=method:161:8 (external lang: Swift, defined: some_module, gen: 1)
+C
\ No newline at end of file
diff --git a/test/Misc/warning-flags.c b/test/Misc/warning-flags.c
index be02e12e2cbb..5172d3b15a90 100644
--- a/test/Misc/warning-flags.c
+++ b/test/Misc/warning-flags.c
@@ -6,8 +6,8 @@ This test serves two purposes:
 (1) It documents all existing warnings that currently have no associated -W flag,
     and ensures that the list never grows.
 
-    If take an existing warning and add a flag, this test will fail.  To
-    fix this test, simply remove that warning from the list below.
+    If you take an existing warning and add a flag, this test will fail.
+    To fix this test, simply remove that warning from the list below.
 
 (2) It prevents us adding new warnings to Clang that have no -W flag.  All
     new warnings should have -W flags.
diff --git a/test/Modules/DebugInfoNamespace.cpp b/test/Modules/DebugInfoNamespace.cpp
new file mode 100644
index 000000000000..33add085d8a6
--- /dev/null
+++ b/test/Modules/DebugInfoNamespace.cpp
@@ -0,0 +1,19 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -debug-info-kind=standalone \
+// RUN:     -dwarf-ext-refs -fmodules \
+// RUN:     -fmodule-format=obj -fimplicit-module-maps \
+// RUN:     -triple %itanium_abi_triple -fmodules-cache-path=%t \
+// RUN:     %s -I %S/Inputs/DebugInfoNamespace -I %t -emit-llvm -o - \
+// RUN:     |  FileCheck %s
+
+#include "A.h"
+#include "B.h"
+using namespace N;
+B b;
+
+// Verify that the forward decl of B is in module B.
+//
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "B",
+// CHECK-SAME:             scope: ![[N:[0-9]+]]
+// CHECK: ![[N]] = !DINamespace(name: "N", scope: ![[B:[0-9]+]])
+// CHECK: ![[B]] = !DIModule(scope: null, name: "B",
diff --git a/test/Modules/DebugInfoSubmoduleImport.c b/test/Modules/DebugInfoSubmoduleImport.c
index 1b31aada9c6a..b608d300d5fe 100644
--- a/test/Modules/DebugInfoSubmoduleImport.c
+++ b/test/Modules/DebugInfoSubmoduleImport.c
@@ -2,6 +2,11 @@
 // RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \
 // RUN:     -fimplicit-module-maps -x c -fmodules-cache-path=%t -I %S/Inputs \
 // RUN:     %s -emit-llvm -debugger-tuning=lldb -o - | FileCheck %s
+//
+// RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \
+// RUN:     -fimplicit-module-maps -x c -fmodules-cache-path=%t -I %S/Inputs \
+// RUN:     -fmodules-local-submodule-visibility \
+// RUN:     %s -emit-llvm -debugger-tuning=lldb -o - | FileCheck %s
 #include "DebugSubmoduleA.h"
 #include "DebugSubmoduleB.h"
 
diff --git a/test/Modules/Inputs/DebugInfoNamespace/A.h b/test/Modules/Inputs/DebugInfoNamespace/A.h
new file mode 100644
index 000000000000..dc5a1cd26ab6
--- /dev/null
+++ b/test/Modules/Inputs/DebugInfoNamespace/A.h
@@ -0,0 +1,3 @@
+namespace N {
+  struct A {};
+}
diff --git a/test/Modules/Inputs/DebugInfoNamespace/B.h b/test/Modules/Inputs/DebugInfoNamespace/B.h
new file mode 100644
index 000000000000..c9033a54d4e5
--- /dev/null
+++ b/test/Modules/Inputs/DebugInfoNamespace/B.h
@@ -0,0 +1,3 @@
+namespace N {
+  struct B {};
+}
diff --git a/test/Modules/Inputs/DebugInfoNamespace/module.modulemap b/test/Modules/Inputs/DebugInfoNamespace/module.modulemap
new file mode 100644
index 000000000000..9300fcf98c64
--- /dev/null
+++ b/test/Modules/Inputs/DebugInfoNamespace/module.modulemap
@@ -0,0 +1,8 @@
+module A {
+  header "A.h"
+  export *
+}
+module B {
+  header "B.h"
+  export *
+}
diff --git a/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/B.h b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/B.h
new file mode 100644
index 000000000000..761540b09cb3
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/B.h
@@ -0,0 +1 @@
+// B.h
diff --git a/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/Sub.h b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/Sub.h
new file mode 100644
index 000000000000..fd86e3cf872f
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/Headers/Sub.h
@@ -0,0 +1,2 @@
+// Sub.h
+#import "B.h"
diff --git a/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/BPriv.h b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/BPriv.h
new file mode 100644
index 000000000000..4ab49b798c63
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/BPriv.h
@@ -0,0 +1 @@
+// BPriv.h
diff --git a/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/SubPriv.h b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/SubPriv.h
new file mode 100644
index 000000000000..f6ac6188d65f
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Frameworks/Sub.framework/PrivateHeaders/SubPriv.h
@@ -0,0 +1 @@
+#import "BPriv.h"
diff --git a/test/Modules/Inputs/MainA.framework/Headers/A.h b/test/Modules/Inputs/MainA.framework/Headers/A.h
new file mode 100644
index 000000000000..975f1f0437bb
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Headers/A.h
@@ -0,0 +1 @@
+// A.h
diff --git a/test/Modules/Inputs/MainA.framework/Headers/Main.h b/test/Modules/Inputs/MainA.framework/Headers/Main.h
new file mode 100644
index 000000000000..cb8cc00a0c45
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Headers/Main.h
@@ -0,0 +1,2 @@
+// Main.h
+#import "A.h"
diff --git a/test/Modules/Inputs/MainA.framework/Modules/module.modulemap b/test/Modules/Inputs/MainA.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..4b0b5e955386
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Modules/module.modulemap
@@ -0,0 +1,12 @@
+framework module MainA {
+  umbrella header "Main.h"
+
+  module * { export * }
+  export *
+
+  framework module Sub {
+    umbrella header "Sub.h"
+    module * { export * }
+    export *
+  }
+}
diff --git a/test/Modules/Inputs/MainA.framework/Modules/module.private.modulemap b/test/Modules/Inputs/MainA.framework/Modules/module.private.modulemap
new file mode 100644
index 000000000000..a8dc5c2be5f0
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/Modules/module.private.modulemap
@@ -0,0 +1,12 @@
+framework module MainA_Private {
+  umbrella header "MainPriv.h"
+
+  module * { export * }
+  export *
+
+  explicit framework module Sub {
+    umbrella header "SubPriv.h"
+    module * { export * }
+    export *
+  }
+}
diff --git a/test/Modules/Inputs/MainA.framework/PrivateHeaders/APriv.h b/test/Modules/Inputs/MainA.framework/PrivateHeaders/APriv.h
new file mode 100644
index 000000000000..6ac683c39c55
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/PrivateHeaders/APriv.h
@@ -0,0 +1 @@
+// APriv.h
diff --git a/test/Modules/Inputs/MainA.framework/PrivateHeaders/MainPriv.h b/test/Modules/Inputs/MainA.framework/PrivateHeaders/MainPriv.h
new file mode 100644
index 000000000000..68103017ad0b
--- /dev/null
+++ b/test/Modules/Inputs/MainA.framework/PrivateHeaders/MainPriv.h
@@ -0,0 +1 @@
+#import "APriv.h"
diff --git a/test/Modules/Inputs/SameHeader/A.h b/test/Modules/Inputs/SameHeader/A.h
new file mode 100644
index 000000000000..bebe9c31c28e
--- /dev/null
+++ b/test/Modules/Inputs/SameHeader/A.h
@@ -0,0 +1,3 @@
+#ifndef __A_h__
+#define __A_h__
+#endif
diff --git a/test/Modules/Inputs/SameHeader/B.h b/test/Modules/Inputs/SameHeader/B.h
new file mode 100644
index 000000000000..c3fe49cd854f
--- /dev/null
+++ b/test/Modules/Inputs/SameHeader/B.h
@@ -0,0 +1,4 @@
+#ifndef __B_h__
+#define __B_h__
+#include "C.h"
+#endif
diff --git a/test/Modules/Inputs/SameHeader/C.h b/test/Modules/Inputs/SameHeader/C.h
new file mode 100644
index 000000000000..33c3316a6517
--- /dev/null
+++ b/test/Modules/Inputs/SameHeader/C.h
@@ -0,0 +1,12 @@
+#ifndef __C_h__
+#define __C_h__
+int c = 1;
+
+struct aaa {
+  int b;
+};
+
+typedef struct fd_set {
+  char c;
+};
+#endif
diff --git a/test/Modules/Inputs/SameHeader/module.modulemap b/test/Modules/Inputs/SameHeader/module.modulemap
new file mode 100644
index 000000000000..d0283a71ac97
--- /dev/null
+++ b/test/Modules/Inputs/SameHeader/module.modulemap
@@ -0,0 +1,11 @@
+module X {
+  module A {
+    header "A.h"
+    export *
+  }
+  module B {
+    header "B.h"
+    export *
+  }
+  export *
+}
diff --git a/test/Modules/find-privateheaders.m b/test/Modules/find-privateheaders.m
index c5e82ac70da2..5720a73f9be8 100644
--- a/test/Modules/find-privateheaders.m
+++ b/test/Modules/find-privateheaders.m
@@ -1,2 +1,13 @@
-// RUN: %clang_cc1 -fmodules -fsyntax-only -F%S/Inputs %s
+// RUN: rm -rf %t.cache
+// RUN: %clang_cc1 -fmodules -fsyntax-only -F%S/Inputs -fimplicit-module-maps \
+// RUN:   -fmodules-cache-path=%t.cache -Wno-private-module -DBUILD_PUBLIC -verify %s
+// RUN: rm -rf %t.cache
+// RUN: %clang_cc1 -fmodules -fsyntax-only -F%S/Inputs -fimplicit-module-maps \
+// RUN:   -fmodules-cache-path=%t.cache -Wno-private-module -verify %s
+//expected-no-diagnostics
+
+#ifdef BUILD_PUBLIC
 #import "Main/Main.h"
+#else
+#import "MainA/MainPriv.h"
+#endif
diff --git a/test/Modules/odr_hash.cpp b/test/Modules/odr_hash.cpp
index 294e925627c6..58814dd6b3fb 100644
--- a/test/Modules/odr_hash.cpp
+++ b/test/Modules/odr_hash.cpp
@@ -1078,6 +1078,39 @@ S<X> s;
 #endif
 }
 
+namespace MultipleTypedefs {
+#if defined(FIRST)
+typedef int B1;
+typedef B1 A1;
+struct S1 {
+  A1 x;
+};
+#elif defined(SECOND)
+typedef int A1;
+struct S1 {
+  A1 x;
+};
+#else
+S1 s1;
+#endif
+
+#if defined(FIRST)
+struct T2 { int x; };
+typedef T2 B2;
+typedef B2 A2;
+struct S2 {
+  T2 x;
+};
+#elif defined(SECOND)
+struct T2 { int x; };
+typedef T2 A2;
+struct S2 {
+  T2 x;
+};
+#else
+S2 s2;
+#endif
+}
 
 // Keep macros contained to one file.
 #ifdef FIRST
diff --git a/test/Modules/preprocess-module.cpp b/test/Modules/preprocess-module.cpp
index a3b789238388..64af00c471df 100644
--- a/test/Modules/preprocess-module.cpp
+++ b/test/Modules/preprocess-module.cpp
@@ -19,6 +19,11 @@
 // RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -x c++-module-map-cpp-output %t/no-rewrite.ii -emit-module -o %t/no-rewrite.pcm
 // RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -x c++-module-map-cpp-output %t/rewrite.ii -emit-module -o %t/rewrite.pcm
 
+// Check that we can load the original module map in the same compilation (this
+// could happen if we had a redundant -fmodule-map-file= in the original
+// build).
+// RUN: %clang_cc1 -fmodules -fmodule-name=file -fmodule-file=%t/fwd.pcm -fmodule-map-file=%S/Inputs/preprocess/module.modulemap -x c++-module-map-cpp-output %t/rewrite.ii -emit-module -o /dev/null
+
 // Check the module we built works.
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/no-rewrite.pcm %s -verify
 // RUN: %clang_cc1 -fmodules -fmodule-file=%t/rewrite.pcm %s -verify
diff --git a/test/Modules/redefinition-same-header.m b/test/Modules/redefinition-same-header.m
new file mode 100644
index 000000000000..f1c6cbbcaa2e
--- /dev/null
+++ b/test/Modules/redefinition-same-header.m
@@ -0,0 +1,20 @@
+// RUN: rm -rf %t.tmp
+// RUN: %clang_cc1 -fsyntax-only -I %S/Inputs/SameHeader -fmodules \
+// RUN:   -fimplicit-module-maps -fmodules-cache-path=%t.tmp %s -verify
+
+// expected-error@Inputs/SameHeader/C.h:3 {{redefinition of 'c'}}
+// expected-note-re@Inputs/SameHeader/B.h:3 {{'{{.*}}C.h' included multiple times, additional include site in header from module 'X.B'}}
+// expected-note@Inputs/SameHeader/module.modulemap:6 {{X.B defined here}}
+// expected-note-re@redefinition-same-header.m:20 {{'{{.*}}C.h' included multiple times, additional include site here}}
+
+// expected-error@Inputs/SameHeader/C.h:5 {{redefinition of 'aaa'}}
+// expected-note-re@Inputs/SameHeader/B.h:3 {{'{{.*}}C.h' included multiple times, additional include site in header from module 'X.B'}}
+// expected-note@Inputs/SameHeader/module.modulemap:6 {{X.B defined here}}
+// expected-note-re@redefinition-same-header.m:20 {{'{{.*}}C.h' included multiple times, additional include site here}}
+
+// expected-error@Inputs/SameHeader/C.h:9 {{redefinition of 'fd_set'}}
+// expected-note-re@Inputs/SameHeader/B.h:3 {{'{{.*}}C.h' included multiple times, additional include site in header from module 'X.B'}}
+// expected-note@Inputs/SameHeader/module.modulemap:6 {{X.B defined here}}
+// expected-note-re@redefinition-same-header.m:20 {{'{{.*}}C.h' included multiple times, additional include site here}}
+#include "A.h" // maps to a modular
+#include "C.h" // textual include
diff --git a/test/OpenMP/report_default_DSA.cpp b/test/OpenMP/report_default_DSA.cpp
new file mode 100644
index 000000000000..d14cd5cbe9a4
--- /dev/null
+++ b/test/OpenMP/report_default_DSA.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 %s
+
+void foo(int x, int n) {
+  double vec[n];
+  for (int iter = 0; iter < x; iter++) {
+#pragma omp target teams distribute parallel for map( \
+    from                                              \
+    : vec [0:n]) default(none)
+    // expected-error@+1 {{variable 'n' must have explicitly specified data sharing attributes}}
+    for (int ii = 0; ii < n; ii++) {
+      // expected-error@+3 {{variable 'iter' must have explicitly specified data sharing attributes}}
+      // expected-error@+2 {{variable 'vec' must have explicitly specified data sharing attributes}}
+      // expected-error@+1 {{variable 'x' must have explicitly specified data sharing attributes}}
+      vec[ii] = iter + ii + x;
+    }
+  }
+}
+
diff --git a/test/PCH/cxx-templates.cpp b/test/PCH/cxx-templates.cpp
index d50eee0623c5..e241701f50df 100644
--- a/test/PCH/cxx-templates.cpp
+++ b/test/PCH/cxx-templates.cpp
@@ -108,3 +108,11 @@ namespace cyclic_module_load {
 template int local_extern::f<int[]>(); // expected-note {{in instantiation of}}
 #endif
 template int local_extern::g<int[]>();
+
+namespace MemberSpecializationLocation {
+#ifndef NO_ERRORS
+  // expected-note@cxx-templates.h:* {{previous}}
+  template<> float A<int>::n; // expected-error {{redeclaration of 'n' with a different type}}
+#endif
+  int k = A<int>::n;
+}
diff --git a/test/PCH/cxx-templates.h b/test/PCH/cxx-templates.h
index c4a844727691..68b252e7974e 100644
--- a/test/PCH/cxx-templates.h
+++ b/test/PCH/cxx-templates.h
@@ -358,3 +358,6 @@ namespace rdar15468709c {
   }
 }
 
+namespace MemberSpecializationLocation {
+  template<typename T> struct A { static int n; };
+}
diff --git a/test/Parser/objc-available.m b/test/Parser/objc-available.m
index a170721240ce..49bc53930655 100644
--- a/test/Parser/objc-available.m
+++ b/test/Parser/objc-available.m
@@ -21,6 +21,12 @@ void f() {
   (void)@available; // expected-error{{expected '('}}
 }
 
+void prettyPlatformNames() {
+  (void)@available(iOS 8, tvOS 10, watchOS 3, macOS 10.11, *);
+  (void)__builtin_available(iOSApplicationExtension 8, tvOSApplicationExtension 10,
+                   watchOSApplicationExtension 3, macOSApplicationExtension 10.11, *);
+}
+
 #if __has_builtin(__builtin_available)
 #error expected
 // expected-error@-1 {{expected}}
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index a0eb8cbcca9a..08f4d2573f40 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -1601,6 +1601,7 @@
 // CHECK_BDVER1_M32: #define __AES__ 1
 // CHECK_BDVER1_M32: #define __AVX__ 1
 // CHECK_BDVER1_M32: #define __FMA4__ 1
+// CHECK_BDVER1_M32: #define __LWP__ 1
 // CHECK_BDVER1_M32: #define __LZCNT__ 1
 // CHECK_BDVER1_M32: #define __MMX__ 1
 // CHECK_BDVER1_M32: #define __PCLMUL__ 1
@@ -1630,6 +1631,7 @@
 // CHECK_BDVER1_M64: #define __AES__ 1
 // CHECK_BDVER1_M64: #define __AVX__ 1
 // CHECK_BDVER1_M64: #define __FMA4__ 1
+// CHECK_BDVER1_M64: #define __LWP__ 1
 // CHECK_BDVER1_M64: #define __LZCNT__ 1
 // CHECK_BDVER1_M64: #define __MMX__ 1
 // CHECK_BDVER1_M64: #define __PCLMUL__ 1
@@ -1664,6 +1666,7 @@
 // CHECK_BDVER2_M32: #define __F16C__ 1
 // CHECK_BDVER2_M32: #define __FMA4__ 1
 // CHECK_BDVER2_M32: #define __FMA__ 1
+// CHECK_BDVER2_M32: #define __LWP__ 1
 // CHECK_BDVER2_M32: #define __LZCNT__ 1
 // CHECK_BDVER2_M32: #define __MMX__ 1
 // CHECK_BDVER2_M32: #define __PCLMUL__ 1
@@ -1697,6 +1700,7 @@
 // CHECK_BDVER2_M64: #define __F16C__ 1
 // CHECK_BDVER2_M64: #define __FMA4__ 1
 // CHECK_BDVER2_M64: #define __FMA__ 1
+// CHECK_BDVER2_M64: #define __LWP__ 1
 // CHECK_BDVER2_M64: #define __LZCNT__ 1
 // CHECK_BDVER2_M64: #define __MMX__ 1
 // CHECK_BDVER2_M64: #define __PCLMUL__ 1
@@ -1733,6 +1737,7 @@
 // CHECK_BDVER3_M32: #define __FMA4__ 1
 // CHECK_BDVER3_M32: #define __FMA__ 1
 // CHECK_BDVER3_M32: #define __FSGSBASE__ 1
+// CHECK_BDVER3_M32: #define __LWP__ 1
 // CHECK_BDVER3_M32: #define __LZCNT__ 1
 // CHECK_BDVER3_M32: #define __MMX__ 1
 // CHECK_BDVER3_M32: #define __PCLMUL__ 1
@@ -1768,6 +1773,7 @@
 // CHECK_BDVER3_M64: #define __FMA4__ 1
 // CHECK_BDVER3_M64: #define __FMA__ 1
 // CHECK_BDVER3_M64: #define __FSGSBASE__ 1
+// CHECK_BDVER3_M64: #define __LWP__ 1
 // CHECK_BDVER3_M64: #define __LZCNT__ 1
 // CHECK_BDVER3_M64: #define __MMX__ 1
 // CHECK_BDVER3_M64: #define __PCLMUL__ 1
@@ -1807,6 +1813,7 @@
 // CHECK_BDVER4_M32: #define __FMA4__ 1
 // CHECK_BDVER4_M32: #define __FMA__ 1
 // CHECK_BDVER4_M32: #define __FSGSBASE__ 1
+// CHECK_BDVER4_M32: #define __LWP__ 1
 // CHECK_BDVER4_M32: #define __LZCNT__ 1
 // CHECK_BDVER4_M32: #define __MMX__ 1
 // CHECK_BDVER4_M32: #define __PCLMUL__ 1
@@ -1843,6 +1850,7 @@
 // CHECK_BDVER4_M64: #define __FMA4__ 1
 // CHECK_BDVER4_M64: #define __FMA__ 1
 // CHECK_BDVER4_M64: #define __FSGSBASE__ 1
+// CHECK_BDVER4_M64: #define __LWP__ 1
 // CHECK_BDVER4_M64: #define __LZCNT__ 1
 // CHECK_BDVER4_M64: #define __MMX__ 1
 // CHECK_BDVER4_M64: #define __PCLMUL__ 1
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index a201900ba762..ce3835f91f42 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -272,6 +272,14 @@
 // AESNOSSE2-NOT: #define __SSE2__ 1
 // AESNOSSE2-NOT: #define __SSE3__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=pentiumpro -mlwp -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=LWP %s
+
+// LWP: #define __LWP__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=bdver1 -mno-lwp -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOLWP %s
+
+// NOLWP-NOT: #define __LWP__ 1
+
 // RUN: %clang -target i386-unknown-unknown -march=pentiumpro -msha -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SHA %s
 
 // SHA: #define __SHA__ 1
diff --git a/test/Sema/overloadable.c b/test/Sema/overloadable.c
index f5e17d211910..49d8085651d4 100644
--- a/test/Sema/overloadable.c
+++ b/test/Sema/overloadable.c
@@ -151,3 +151,18 @@ void dropping_qualifiers_is_incompatible() {
   foo(ccharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@148{{candidate function}} expected-note@149{{candidate function}}
   foo(vcharbuf); // expected-error{{call to 'foo' is ambiguous}} expected-note@148{{candidate function}} expected-note@149{{candidate function}}
 }
+
+// Bug: we used to treat `__typeof__(foo)` as though it was `__typeof__(&foo)`
+// if `foo` was overloaded with only one function that could have its address
+// taken.
+void typeof_function_is_not_a_pointer() {
+  void not_a_pointer(void *) __attribute__((overloadable));
+  void not_a_pointer(char *__attribute__((pass_object_size(1))))
+    __attribute__((overloadable));
+
+  __typeof__(not_a_pointer) *fn;
+
+  void take_fn(void (*)(void *));
+  // if take_fn is passed a void (**)(void *), we'll get a warning.
+  take_fn(fn);
+}
diff --git a/test/Sema/redefinition-same-header.c b/test/Sema/redefinition-same-header.c
new file mode 100644
index 000000000000..be5bd1d71c92
--- /dev/null
+++ b/test/Sema/redefinition-same-header.c
@@ -0,0 +1,14 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: echo 'int yyy = 42;' > %t/a.h
+// RUN: %clang_cc1 -fsyntax-only %s -I%t  -verify
+
+// expected-error@a.h:1 {{redefinition of 'yyy'}}
+// expected-note@a.h:1 {{unguarded header; consider using #ifdef guards or #pragma once}}
+// expected-note-re@redefinition-same-header.c:11 {{'{{.*}}a.h' included multiple times, additional include site here}}
+// expected-note-re@redefinition-same-header.c:12 {{'{{.*}}a.h' included multiple times, additional include site here}}
+
+#include "a.h"
+#include "a.h"
+
+int foo() { return yyy; }
diff --git a/test/Sema/typo-correction.c b/test/Sema/typo-correction.c
index 59f022dfe528..78007015dcae 100644
--- a/test/Sema/typo-correction.c
+++ b/test/Sema/typo-correction.c
@@ -80,3 +80,10 @@ int h() {
   g(x, 5 ? z : 0); // expected-error 2 {{use of undeclared identifier}}
   (x, 5 ? z : 0);  // expected-error 2 {{use of undeclared identifier}}
 }
+
+__attribute__((overloadable)) void func_overloadable(int);
+__attribute__((overloadable)) void func_overloadable(float);
+
+void overloadable_callexpr(int arg) {
+	func_overloadable(ar); //expected-error{{use of undeclared identifier}}
+}
diff --git a/test/Sema/vector-cast.c b/test/Sema/vector-cast.c
index ea4acfac6a0b..cf23eb2742c4 100644
--- a/test/Sema/vector-cast.c
+++ b/test/Sema/vector-cast.c
@@ -53,9 +53,8 @@ void f4() {
   float2 f2;
   double d, a, b, c;
   float64x2_t v = {0.0, 1.0};
-  // FIXME: These diagnostics are inaccurate: should complain that 'double' to vector 'float2' involves truncation
-  f2 += d; // expected-error {{cannot convert between vector values of different size ('float2' (vector of 2 'float' values) and 'double')}}
-  d += f2; // expected-error {{cannot convert between vector values of different size}}
+  f2 += d; // expected-error {{cannot convert between scalar type 'double' and vector type 'float2' (vector of 2 'float' values) as implicit conversion would cause truncation}}
+  d += f2; // expected-error {{assigning to 'double' from incompatible type 'float2' (vector of 2 'float' values)}}
   a = 3.0 + vget_low_f64(v);
   b = vget_low_f64(v) + 3.0;
   c = vget_low_f64(v);
diff --git a/test/Sema/vector-gcc-compat.c b/test/Sema/vector-gcc-compat.c
new file mode 100644
index 000000000000..9eb0569b25f0
--- /dev/null
+++ b/test/Sema/vector-gcc-compat.c
@@ -0,0 +1,330 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -Weverything -triple x86_64-apple-darwin10
+
+// Test the compatibility of clang's vector extensions with gcc's vector
+// extensions for C. Notably &&, ||, ?: and ! are not available.
+typedef long long v2i64 __attribute__((vector_size(16)));
+typedef int v2i32 __attribute__((vector_size(8)));
+typedef short v2i16 __attribute__((vector_size(4)));
+typedef char v2i8 __attribute__((vector_size(2)));
+
+typedef unsigned long long v2u64 __attribute__((vector_size(16)));
+typedef unsigned int v2u32 __attribute__((vector_size(8)));
+typedef unsigned short v2u16 __attribute__((vector_size(4)));
+typedef unsigned char v2u8 __attribute__((vector_size(2)));
+
+typedef float v4f32 __attribute__((vector_size(16)));
+typedef double v2f64 __attribute__((vector_size(16)));
+typedef double v4f64 __attribute__((vector_size(32)));
+typedef int v4i32 __attribute((vector_size(16)));
+
+void arithmeticTest(void);
+void logicTest(void);
+void comparisonTest(void);
+void floatTestSignedType(char a, short b, int c, long long d);
+void floatTestUnsignedType(unsigned char a, unsigned short b, unsigned int c,
+                           unsigned long long d);
+void floatTestConstant(void);
+void intTestType(char a, short b, int c, long long d);
+void intTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                         unsigned long long d);
+void uintTestType(char a, short b, int c, long long d);
+void uintTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                          unsigned long long d);
+void uintTestConstant(v2u64 v2u64_a, v2u32 v2u32_a, v2u16 v2u16_a, v2u8 v2u8_a);
+void intTestConstant(v2i64 v2i64_a, v2i32 v2i32_a, v2i16 v2i16_a, v2i8 v2i8_a);
+
+void arithmeticTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1};
+  v2i64 v2i64_r;
+
+  v2i64_r = v2i64_a + 1;
+  v2i64_r = v2i64_a - 1;
+  v2i64_r = v2i64_a * 1;
+  v2i64_r = v2i64_a / 1;
+  v2i64_r = v2i64_a % 1;
+
+  v2i64_r = 1 + v2i64_a;
+  v2i64_r = 1 - v2i64_a;
+  v2i64_r = 1 * v2i64_a;
+  v2i64_r = 1 / v2i64_a;
+  v2i64_r = 1 % v2i64_a;
+
+  v2i64_a += 1;
+  v2i64_a -= 1;
+  v2i64_a *= 1;
+  v2i64_a /= 1;
+  v2i64_a %= 1;
+}
+
+void comparisonTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1};
+  v2i64 v2i64_r;
+
+  v2i64_r = v2i64_a == 1;
+  v2i64_r = v2i64_a != 1;
+  v2i64_r = v2i64_a < 1;
+  v2i64_r = v2i64_a > 1;
+  v2i64_r = v2i64_a <= 1;
+  v2i64_r = v2i64_a >= 1;
+
+  v2i64_r = 1 == v2i64_a;
+  v2i64_r = 1 != v2i64_a;
+  v2i64_r = 1 < v2i64_a;
+  v2i64_r = 1 > v2i64_a;
+  v2i64_r = 1 <= v2i64_a;
+  v2i64_r = 1 >= v2i64_a;
+}
+
+void logicTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1};
+  v2i64 v2i64_b = (v2i64){2, 1};
+  v2i64 v2i64_c = (v2i64){3, 1};
+  v2i64 v2i64_r;
+
+  v2i64_r = !v2i64_a; // expected-error {{invalid argument type 'v2i64' (vector of 2 'long long' values) to unary expression}}
+  v2i64_r = ~v2i64_a;
+
+  v2i64_r = v2i64_a ? v2i64_b : v2i64_c; // expected-error {{used type 'v2i64' (vector of 2 'long long' values) where arithmetic or pointer type is required}}
+
+  v2i64_r = v2i64_a & 1;
+  v2i64_r = v2i64_a | 1;
+  v2i64_r = v2i64_a ^ 1;
+
+  v2i64_r = 1 & v2i64_a;
+  v2i64_r = 1 | v2i64_a;
+  v2i64_r = 1 ^ v2i64_a;
+
+  v2i64_a &= 1;
+  v2i64_a |= 1;
+  v2i64_a ^= 1;
+
+  v2i64_r = v2i64_a && 1; // expected-error {{logical expression with vector type 'v2i64' (vector of 2 'long long' values) and non-vector type 'int' is only supported in C++}}
+  v2i64_r = v2i64_a || 1; // expected-error {{logical expression with vector type 'v2i64' (vector of 2 'long long' values) and non-vector type 'int' is only supported in C++}}
+
+  v2i64_r = v2i64_a && v2i64_a; // expected-error {{logical expression with vector types 'v2i64' (vector of 2 'long long' values) and 'v2i64' is only supported in C++}}
+  v2i64_r = v2i64_a || v2i64_a; // expected-error {{logical expression with vector types 'v2i64' (vector of 2 'long long' values) and 'v2i64' is only supported in C++}}
+
+  v2i64_r = v2i64_a << 1;
+  v2i64_r = v2i64_a >> 1;
+
+  v2i64_r = 1 << v2i64_a;
+  v2i64_r = 1 >> v2i64_a;
+
+  v2i64_a <<= 1;
+  v2i64_a >>= 1;
+}
+
+// For operations with floating point types, we check that interger constants
+// can be respresented, or failing that checking based on the integer types.
+void floatTestConstant(void) {
+  // Test that constants added to floats must be expressible as floating point
+  // numbers.
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f32_a = v4f32_a + 1;
+  v4f32_a = v4f32_a + 0xFFFFFF;
+  v4f32_a = v4f32_a + (-1567563LL);
+  v4f32_a = v4f32_a + (16777208);
+  v4f32_a = v4f32_a + (16777219); // expected-error {{cannot convert between scalar type 'int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+}
+
+void floatTestConstantComparison(void);
+void doubleTestConstantComparison(void);
+
+void floatTestConstantComparison(void) {
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4i32 v4i32_r;
+  v4i32_r = v4f32_a > 0.4f;
+  v4i32_r = v4f32_a >= 0.4f;
+  v4i32_r = v4f32_a < 0.4f;
+  v4i32_r = v4f32_a <= 0.4f;
+  v4i32_r = v4f32_a == 0.4f; // expected-warning {{comparing floating point with == or != is unsafe}}
+  v4i32_r = v4f32_a != 0.4f; // expected-warning {{comparing floating point with == or != is unsafe}}
+}
+
+void doubleTestConstantComparison(void) {
+  v2f64 v2f64_a = {0.4, 0.4};
+  v2i64 v2i64_r;
+  v2i64_r = v2f64_a > 0.4;
+  v2i64_r = v2f64_a >= 0.4;
+  v2i64_r = v2f64_a < 0.4;
+  v2i64_r = v2f64_a <= 0.4;
+  v2i64_r = v2f64_a == 0.4; // expected-warning {{comparing floating point with == or != is unsafe}}
+  v2i64_r = v2f64_a != 0.4; // expected-warning {{comparing floating point with == or != is unsafe}}
+}
+
+void floatTestUnsignedType(unsigned char a, unsigned short b, unsigned int c,
+                           unsigned long long d) {
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f64 v4f64_b = {0.4, 0.4, 0.4, 0.4};
+
+  v4f32_a = v4f32_a + a;
+  v4f32_a = v4f32_a + b;
+  v4f32_a = v4f32_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+  v4f32_a = v4f32_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+
+  v4f64_b = v4f64_b + a;
+  v4f64_b = v4f64_b + b;
+  v4f64_b = v4f64_b + c;
+  v4f64_b = v4f64_b + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v4f64' (vector of 4 'double' values) as implicit conversion would cause truncation}}
+}
+
+void floatTestSignedType(char a, short b, int c, long long d) {
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f64 v4f64_b = {0.4, 0.4, 0.4, 0.4};
+
+  v4f32_a = v4f32_a + a;
+  v4f32_a = v4f32_a + b;
+  v4f32_a = v4f32_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+  v4f32_a = v4f32_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+
+  v4f64_b = v4f64_b + a;
+  v4f64_b = v4f64_b + b;
+  v4f64_b = v4f64_b + c;
+  v4f64_b = v4f64_b + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v4f64' (vector of 4 'double' values) as implicit conversion would cause truncation}}
+}
+
+void intTestType(char a, short b, int c, long long d) {
+  v2i64 v2i64_a = {1, 2};
+  v2i32 v2i32_a = {1, 2};
+  v2i16 v2i16_a = {1, 2};
+  v2i8 v2i8_a = {1, 2};
+
+  v2i64_a = v2i64_a + d;
+  v2i64_a = v2i64_a + c;
+  v2i64_a = v2i64_a + b;
+  v2i64_a = v2i64_a + a;
+
+  v2i32_a = v2i32_a + d; // expected-warning {{implicit conversion loses integer precision: 'long long' to 'v2i32' (vector of 2 'int' values)}}
+  v2i32_a = v2i32_a + c;
+  v2i32_a = v2i32_a + b;
+  v2i32_a = v2i32_a + a;
+
+  v2i16_a = v2i16_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + c; // expected-warning {{implicit conversion loses integer precision: 'int' to 'v2i16' (vector of 2 'short' values)}}
+  v2i16_a = v2i16_a + b;
+  v2i16_a = v2i16_a + a;
+
+  v2i8_a = v2i8_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + b; // expected-warning {{implicit conversion loses integer precision: 'short' to 'v2i8' (vector of 2 'char' values)}}
+  v2i8_a = v2i8_a + a;
+}
+
+void intTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                         unsigned long long d) {
+  v2i64 v2i64_a = {1, 2};
+  v2i32 v2i32_a = {1, 2};
+  v2i16 v2i16_a = {1, 2};
+  v2i8 v2i8_a = {1, 2};
+
+  v2i64_a = v2i64_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i64' (vector of 2 'long long' values) as implicit conversion would cause truncation}}
+
+  v2i64_a = v2i64_a + c;
+  v2i64_a = v2i64_a + b;
+  v2i64_a = v2i64_a + a;
+
+  v2i32_a = v2i32_a + d; // expected-warning {{implicit conversion loses integer precision: 'unsigned long long' to 'v2i32' (vector of 2 'int' values)}}
+  v2i32_a = v2i32_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2i32' (vector of 2 'int' values) as implicit conversion would cause truncation}}
+  v2i32_a = v2i32_a + b;
+  v2i32_a = v2i32_a + a;
+
+  v2i16_a = v2i16_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + c; // expected-warning {{implicit conversion loses integer precision: 'unsigned int' to 'v2i16' (vector of 2 'short' values)}}
+  v2i16_a = v2i16_a + b; // expected-error {{cannot convert between scalar type 'unsigned short' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + a;
+
+  v2i8_a = v2i8_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + b; // expected-warning {{implicit conversion loses integer precision: 'unsigned short' to 'v2i8' (vector of 2 'char' values)}}
+  v2i8_a = v2i8_a + a; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+}
+
+void uintTestType(char a, short b, int c, long long d) {
+  v2u64 v2u64_a = {1, 2};
+  v2u32 v2u32_a = {1, 2};
+  v2u16 v2u16_a = {1, 2};
+  v2u8 v2u8_a = {1, 2};
+
+  v2u64_a = v2u64_a + d; // expected-warning {{implicit conversion changes signedness: 'long long' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + c; // expected-warning {{implicit conversion changes signedness: 'int' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+
+  v2u32_a = v2u32_a + d; // expected-warning {{implicit conversion loses integer precision: 'long long' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + c; // expected-warning {{implicit conversion changes signedness: 'int' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u32' (vector of 2 'unsigned int' values)}}
+
+  v2u16_a = v2u16_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2u16' (vector of 2 'unsigned short' values) as implicit conversion would cause truncation}}
+  v2u16_a = v2u16_a + c; // expected-warning {{implicit conversion loses integer precision: 'int' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u16' (vector of 2 'unsigned short' values)}}
+
+  v2u8_a = v2u8_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + b; // expected-warning {{implicit conversion loses integer precision: 'short' to 'v2u8' (vector of 2 'unsigned char' values)}}
+  v2u8_a = v2u8_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u8' (vector of 2 'unsigned char' values)}}
+}
+
+void uintTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                          unsigned long long d) {
+  v2u64 v2u64_a = {1, 2};
+  v2u32 v2u32_a = {1, 2};
+  v2u16 v2u16_a = {1, 2};
+  v2u8 v2u8_a = {1, 2};
+
+  v2u64_a = v2u64_a + d;
+  v2u64_a = v2u64_a + c;
+  v2u64_a = v2u64_a + b;
+  v2u64_a = v2u64_a + a;
+
+  v2u32_a = v2u32_a + d; // expected-warning {{implicit conversion loses integer precision: 'unsigned long long' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + c;
+  v2u32_a = v2u32_a + b;
+  v2u32_a = v2u32_a + a;
+
+  v2u16_a = v2u16_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2u16' (vector of 2 'unsigned short' values) as implicit conversion would cause truncation}}
+  v2u16_a = v2u16_a + c; // expected-warning {{implicit conversion loses integer precision: 'unsigned int' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + b;
+  v2u16_a = v2u16_a + a;
+
+  v2u8_a = v2u8_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + b; // expected-warning {{implicit conversion loses integer precision: 'unsigned short' to 'v2u8' (vector of 2 'unsigned char' values)}}
+  v2u8_a = v2u8_a + a;
+}
+
+void uintTestConstant(v2u64 v2u64_a, v2u32 v2u32_a, v2u16 v2u16_a,
+                      v2u8 v2u8_a) {
+  v2u64_a = v2u64_a + 0xFFFFFFFFFFFFFFFF;
+  v2u32_a = v2u32_a + 0xFFFFFFFF;
+  v2u16_a = v2u16_a + 0xFFFF;
+  v2u8_a = v2u8_a + 0xFF;
+
+  v2u32_a = v2u32_a + 0x1FFFFFFFF; // expected-warning {{implicit conversion from 'long' to 'v2u32' (vector of 2 'unsigned int' values) changes value from 8589934591 to 4294967295}}
+  v2u16_a = v2u16_a + 0x1FFFF;     // expected-warning {{implicit conversion from 'int' to 'v2u16' (vector of 2 'unsigned short' values) changes value from 131071 to 65535}}
+  v2u8_a = v2u8_a + 0x1FF;         // expected-error {{cannot convert between scalar type 'int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+}
+
+void intTestConstant(v2i64 v2i64_a, v2i32 v2i32_a, v2i16 v2i16_a, v2i8 v2i8_a) {
+  // Legal upper bounds.
+  v2i64_a = v2i64_a + (long long)0x7FFFFFFFFFFFFFFF;
+  v2i32_a = v2i32_a + (int)0x7FFFFFFF;
+  v2i16_a = v2i16_a + (short)0x7FFF;
+  v2i8_a = v2i8_a + (char)0x7F;
+
+  // Legal lower bounds.
+  v2i64_a = v2i64_a + (-9223372036854775807);
+  v2i32_a = v2i32_a + (-2147483648);
+  v2i16_a = v2i16_a + (-32768);
+  v2i8_a = v2i8_a + (-128);
+
+  // One increment/decrement more than the type can hold
+  v2i32_a = v2i32_a + 2147483648; // expected-warning {{implicit conversion from 'long' to 'v2i32' (vector of 2 'int' values) changes value from 2147483648 to -2147483648}}
+  v2i16_a = v2i16_a + 32768;      // expected-warning {{implicit conversion from 'int' to 'v2i16' (vector of 2 'short' values) changes value from 32768 to -32768}}
+  v2i8_a = v2i8_a + 128;          // expected-warning {{implicit conversion from 'int' to 'v2i8' (vector of 2 'char' values) changes value from 128 to -128}}
+
+  v2i32_a = v2i32_a + (-2147483649); // expected-warning {{implicit conversion from 'long' to 'v2i32' (vector of 2 'int' values) changes value from -2147483649 to 2147483647}}
+  v2i16_a = v2i16_a + (-32769);      // expected-warning {{implicit conversion from 'int' to 'v2i16' (vector of 2 'short' values) changes value from -32769 to 32767}}
+  v2i8_a = v2i8_a + (-129);          // expected-error {{cannot convert between scalar type 'int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+}
diff --git a/test/Sema/vector-gcc-compat.cpp b/test/Sema/vector-gcc-compat.cpp
new file mode 100644
index 000000000000..12da314c325f
--- /dev/null
+++ b/test/Sema/vector-gcc-compat.cpp
@@ -0,0 +1,328 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -Weverything -std=c++11 -triple x86_64-apple-darwin10
+
+// Test the compatibility of clang++'s vector extensions with g++'s vector
+// extensions. In comparison to the extensions available in C, the !, ?:, && and
+// || operators work on vector types.
+
+typedef long long v2i64 __attribute__((vector_size(16))); // expected-warning {{'long long' is incompatible with C++98}}
+typedef int v2i32 __attribute__((vector_size(8)));
+typedef short v2i16 __attribute__((vector_size(4)));
+typedef char v2i8 __attribute__((vector_size(2)));
+
+typedef unsigned long long v2u64 __attribute__((vector_size(16))); // expected-warning {{'long long' is incompatible with C++98}}
+typedef unsigned int v2u32 __attribute__((vector_size(8)));
+typedef unsigned short v2u16 __attribute__((vector_size(4)));
+typedef unsigned char v2u8 __attribute__((vector_size(2)));
+
+typedef float v4f32 __attribute__((vector_size(16)));
+typedef double v2f64 __attribute__((vector_size(16)));
+typedef double v4f64 __attribute__((vector_size(32)));
+typedef int v4i32 __attribute((vector_size(16)));
+
+void arithmeticTest(void);
+void logicTest(void);
+void comparisonTest(void);
+void floatTestSignedType(char a, short b, int c, long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void floatTestUnsignedType(unsigned char a, unsigned short b, unsigned int c,
+                           unsigned long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void floatTestConstant(void);
+void intTestType(char a, short b, int c, long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void intTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                         unsigned long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void uintTestType(char a, short b, int c, long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void uintTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                          unsigned long long d); // expected-warning {{'long long' is incompatible with C++98}}
+void uintTestConstant(v2u64 v2u64_a, v2u32 v2u32_a, v2u16 v2u16_a, v2u8 v2u8_a);
+void intTestConstant(v2i64 v2i64_a, v2i32 v2i32_a, v2i16 v2i16_a, v2i8 v2i8_a);
+
+void arithmeticTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1}; // expected-warning {{compound literals are a C99-specific feature}}
+  v2i64 v2i64_r;
+
+  v2i64_r = v2i64_a + 1;
+  v2i64_r = v2i64_a - 1;
+  v2i64_r = v2i64_a * 1;
+  v2i64_r = v2i64_a / 1;
+  v2i64_r = v2i64_a % 1;
+
+  v2i64_r = 1 + v2i64_a;
+  v2i64_r = 1 - v2i64_a;
+  v2i64_r = 1 * v2i64_a;
+  v2i64_r = 1 / v2i64_a;
+  v2i64_r = 1 % v2i64_a;
+
+  v2i64_a += 1;
+  v2i64_a -= 1;
+  v2i64_a *= 1;
+  v2i64_a /= 1;
+  v2i64_a %= 1;
+}
+
+void comparisonTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1}; // expected-warning {{compound literals are a C99-specific feature}}
+  v2i64 v2i64_r;
+
+  v2i64_r = v2i64_a == 1;
+  v2i64_r = v2i64_a != 1;
+  v2i64_r = v2i64_a < 1;
+  v2i64_r = v2i64_a > 1;
+  v2i64_r = v2i64_a <= 1;
+  v2i64_r = v2i64_a >= 1;
+
+  v2i64_r = 1 == v2i64_a;
+  v2i64_r = 1 != v2i64_a;
+  v2i64_r = 1 < v2i64_a;
+  v2i64_r = 1 > v2i64_a;
+  v2i64_r = 1 <= v2i64_a;
+  v2i64_r = 1 >= v2i64_a;
+}
+
+void logicTest(void) {
+  v2i64 v2i64_a = (v2i64){0, 1}; // expected-warning {{compound literals are a C99-specific feature}}
+  v2i64 v2i64_b = (v2i64){2, 1}; // expected-warning {{compound literals are a C99-specific feature}}
+  v2i64 v2i64_c = (v2i64){3, 1}; // expected-warning {{compound literals are a C99-specific feature}}
+  v2i64 v2i64_r;
+
+  v2i64_r = !v2i64_a;  // expected-error {{invalid argument type 'v2i64' (vector of 2 'long long' values) to unary expression}}
+  v2i64_r = ~v2i64_a;
+
+  v2i64_r = v2i64_a ? v2i64_b : v2i64_c; // expected-error {{value of type 'v2i64' (vector of 2 'long long' values) is not contextually convertible to 'bool'}}
+
+  v2i64_r = v2i64_a & 1;
+  v2i64_r = v2i64_a | 1;
+  v2i64_r = v2i64_a ^ 1;
+
+  v2i64_r = 1 & v2i64_a;
+  v2i64_r = 1 | v2i64_a;
+  v2i64_r = 1 ^ v2i64_a;
+  v2i64_a &= 1;
+  v2i64_a |= 1;
+  v2i64_a ^= 1;
+
+  v2i64_r = v2i64_a && 1;
+  v2i64_r = v2i64_a || 1;
+
+  v2i64_r = v2i64_a << 1;
+  v2i64_r = v2i64_a >> 1;
+
+  v2i64_r = 1 << v2i64_a;
+  v2i64_r = 1 >> v2i64_a;
+
+  v2i64_a <<= 1;
+  v2i64_a >>= 1;
+}
+
+// For operations with floating point types, we check that interger constants
+// can be respresented, or failing that checking based on the integer types.
+void floatTestConstant(void) {
+  // Test that constants added to floats must be expressible as floating point
+  // numbers.
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f32_a = v4f32_a + 1;
+  v4f32_a = v4f32_a + 0xFFFFFF;
+  v4f32_a = v4f32_a + (-1567563LL); // expected-warning {{'long long' is incompatible with C++98}}
+  v4f32_a = v4f32_a + (16777208);
+  v4f32_a = v4f32_a + (16777219); // expected-error {{cannot convert between scalar type 'int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+}
+
+void floatTestConstantComparison(void);
+void doubleTestConstantComparison(void);
+
+void floatTestConstantComparison(void) {
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4i32 v4i32_r;
+  v4i32_r = v4f32_a > 0.4f;
+  v4i32_r = v4f32_a >= 0.4f;
+  v4i32_r = v4f32_a < 0.4f;
+  v4i32_r = v4f32_a <= 0.4f;
+  v4i32_r = v4f32_a == 0.4f; // expected-warning {{comparing floating point with == or != is unsafe}}
+  v4i32_r = v4f32_a != 0.4f; // expected-warning {{comparing floating point with == or != is unsafe}}
+}
+
+void doubleTestConstantComparison(void) {
+  v2f64 v2f64_a = {0.4, 0.4};
+  v2i64 v2i64_r;
+  v2i64_r = v2f64_a > 0.4;
+  v2i64_r = v2f64_a >= 0.4;
+  v2i64_r = v2f64_a < 0.4;
+  v2i64_r = v2f64_a <= 0.4;
+  v2i64_r = v2f64_a == 0.4; // expected-warning {{comparing floating point with == or != is unsafe}}
+  v2i64_r = v2f64_a != 0.4; // expected-warning {{comparing floating point with == or != is unsafe}}
+}
+
+void floatTestUnsignedType(unsigned char a, unsigned short b, unsigned int c,
+                           unsigned long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f64 v4f64_b = {0.4, 0.4, 0.4, 0.4};
+
+  v4f32_a = v4f32_a + a;
+  v4f32_a = v4f32_a + b;
+  v4f32_a = v4f32_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+  v4f32_a = v4f32_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+
+  v4f64_b = v4f64_b + a;
+  v4f64_b = v4f64_b + b;
+  v4f64_b = v4f64_b + c;
+  v4f64_b = v4f64_b + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v4f64' (vector of 4 'double' values) as implicit conversion would cause truncation}}
+}
+
+void floatTestSignedType(char a, short b, int c, long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v4f32 v4f32_a = {0.4f, 0.4f, 0.4f, 0.4f};
+  v4f64 v4f64_b = {0.4, 0.4, 0.4, 0.4};
+
+  v4f32_a = v4f32_a + a;
+  v4f32_a = v4f32_a + b;
+  v4f32_a = v4f32_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+  v4f32_a = v4f32_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v4f32' (vector of 4 'float' values) as implicit conversion would cause truncation}}
+
+  v4f64_b = v4f64_b + a;
+  v4f64_b = v4f64_b + b;
+  v4f64_b = v4f64_b + c;
+  v4f64_b = v4f64_b + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v4f64' (vector of 4 'double' values) as implicit conversion would cause truncation}}
+}
+
+void intTestType(char a, short b, int c, long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v2i64 v2i64_a = {1, 2};
+  v2i32 v2i32_a = {1, 2};
+  v2i16 v2i16_a = {1, 2};
+  v2i8 v2i8_a = {1, 2};
+
+  v2i64_a = v2i64_a + d;
+  v2i64_a = v2i64_a + c;
+  v2i64_a = v2i64_a + b;
+  v2i64_a = v2i64_a + a;
+
+  v2i32_a = v2i32_a + d; // expected-warning {{implicit conversion loses integer precision: 'long long' to 'v2i32' (vector of 2 'int' values)}}
+  v2i32_a = v2i32_a + c;
+  v2i32_a = v2i32_a + b;
+  v2i32_a = v2i32_a + a;
+
+  v2i16_a = v2i16_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + c; // expected-warning {{implicit conversion loses integer precision: 'int' to 'v2i16' (vector of 2 'short' values)}}
+  v2i16_a = v2i16_a + b;
+  v2i16_a = v2i16_a + a;
+
+  v2i8_a = v2i8_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + b; // expected-warning {{implicit conversion loses integer precision: 'short' to 'v2i8' (vector of 2 'char' values)}}
+  v2i8_a = v2i8_a + a;
+}
+
+void intTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                         unsigned long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v2i64 v2i64_a = {1, 2};
+  v2i32 v2i32_a = {1, 2};
+  v2i16 v2i16_a = {1, 2};
+  v2i8 v2i8_a = {1, 2};
+
+  v2i64_a = v2i64_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i64' (vector of 2 'long long' values) as implicit conversion would cause truncation}}
+
+  v2i64_a = v2i64_a + c;
+  v2i64_a = v2i64_a + b;
+  v2i64_a = v2i64_a + a;
+
+  v2i32_a = v2i32_a + d; // expected-warning {{implicit conversion loses integer precision: 'unsigned long long' to 'v2i32' (vector of 2 'int' values)}}
+  v2i32_a = v2i32_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2i32' (vector of 2 'int' values) as implicit conversion would cause truncation}}
+  v2i32_a = v2i32_a + b;
+  v2i32_a = v2i32_a + a;
+
+  v2i16_a = v2i16_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + c; // expected-warning {{implicit conversion loses integer precision: 'unsigned int' to 'v2i16' (vector of 2 'short' values)}}
+  v2i16_a = v2i16_a + b; // expected-error {{cannot convert between scalar type 'unsigned short' and vector type 'v2i16' (vector of 2 'short' values) as implicit conversion would cause truncation}}
+  v2i16_a = v2i16_a + a;
+
+  v2i8_a = v2i8_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+  v2i8_a = v2i8_a + b; // expected-warning {{implicit conversion loses integer precision: 'unsigned short' to 'v2i8' (vector of 2 'char' values)}}
+  v2i8_a = v2i8_a + a; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+}
+
+void uintTestType(char a, short b, int c, long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v2u64 v2u64_a = {1, 2};
+  v2u32 v2u32_a = {1, 2};
+  v2u16 v2u16_a = {1, 2};
+  v2u8 v2u8_a = {1, 2};
+
+  v2u64_a = v2u64_a + d; // expected-warning {{implicit conversion changes signedness: 'long long' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + c; // expected-warning {{implicit conversion changes signedness: 'int' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+  v2u64_a = v2u64_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u64' (vector of 2 'unsigned long long' values)}}
+
+  v2u32_a = v2u32_a + d; // expected-warning {{implicit conversion loses integer precision: 'long long' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + c; // expected-warning {{implicit conversion changes signedness: 'int' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u32' (vector of 2 'unsigned int' values)}}
+
+  v2u16_a = v2u16_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2u16' (vector of 2 'unsigned short' values) as implicit conversion would cause truncation}}
+  v2u16_a = v2u16_a + c; // expected-warning {{implicit conversion loses integer precision: 'int' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + b; // expected-warning {{implicit conversion changes signedness: 'short' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u16' (vector of 2 'unsigned short' values)}}
+
+  v2u8_a = v2u8_a + d; // expected-error {{cannot convert between scalar type 'long long' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + c; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + b; // expected-warning {{implicit conversion loses integer precision: 'short' to 'v2u8' (vector of 2 'unsigned char' values)}}
+  v2u8_a = v2u8_a + a; // expected-warning {{implicit conversion changes signedness: 'char' to 'v2u8' (vector of 2 'unsigned char' values)}}
+}
+
+void uintTestTypeUnsigned(unsigned char a, unsigned short b, unsigned int c,
+                          unsigned long long d) { // expected-warning {{'long long' is incompatible with C++98}}
+  v2u64 v2u64_a = {1, 2};
+  v2u32 v2u32_a = {1, 2};
+  v2u16 v2u16_a = {1, 2};
+  v2u8 v2u8_a = {1, 2};
+
+  v2u64_a = v2u64_a + d;
+  v2u64_a = v2u64_a + c;
+  v2u64_a = v2u64_a + b;
+  v2u64_a = v2u64_a + a;
+
+  v2u32_a = v2u32_a + d; // expected-warning {{implicit conversion loses integer precision: 'unsigned long long' to 'v2u32' (vector of 2 'unsigned int' values)}}
+  v2u32_a = v2u32_a + c;
+  v2u32_a = v2u32_a + b;
+  v2u32_a = v2u32_a + a;
+
+  v2u16_a = v2u16_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2u16' (vector of 2 'unsigned short' values) as implicit conversion would cause truncation}}
+  v2u16_a = v2u16_a + c; // expected-warning {{implicit conversion loses integer precision: 'unsigned int' to 'v2u16' (vector of 2 'unsigned short' values)}}
+  v2u16_a = v2u16_a + b;
+  v2u16_a = v2u16_a + a;
+
+  v2u8_a = v2u8_a + d; // expected-error {{cannot convert between scalar type 'unsigned long long' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + c; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+  v2u8_a = v2u8_a + b; // expected-warning {{implicit conversion loses integer precision: 'unsigned short' to 'v2u8' (vector of 2 'unsigned char' values)}}
+  v2u8_a = v2u8_a + a;
+}
+
+void uintTestConstant(v2u64 v2u64_a, v2u32 v2u32_a, v2u16 v2u16_a,
+                      v2u8 v2u8_a) {
+  v2u64_a = v2u64_a + 0xFFFFFFFFFFFFFFFF;
+  v2u32_a = v2u32_a + 0xFFFFFFFF;
+  v2u16_a = v2u16_a + 0xFFFF;
+  v2u8_a = v2u8_a + 0xFF;
+
+  v2u32_a = v2u32_a + 0x1FFFFFFFF; // expected-warning {{implicit conversion from 'long' to 'v2u32' (vector of 2 'unsigned int' values) changes value from 8589934591 to 4294967295}}
+  v2u16_a = v2u16_a + 0x1FFFF;     // expected-warning {{implicit conversion from 'int' to 'v2u16' (vector of 2 'unsigned short' values) changes value from 131071 to 65535}}
+  v2u8_a = v2u8_a + 0x1FF;         // expected-error {{cannot convert between scalar type 'int' and vector type 'v2u8' (vector of 2 'unsigned char' values) as implicit conversion would cause truncation}}
+}
+
+void intTestConstant(v2i64 v2i64_a, v2i32 v2i32_a, v2i16 v2i16_a, v2i8 v2i8_a) {
+  // Legal upper bounds.
+  v2i64_a = v2i64_a + static_cast<long long>(0x7FFFFFFFFFFFFFFF); // expected-warning {{'long long' is incompatible with C++98}}
+  v2i32_a = v2i32_a + static_cast<int>(0x7FFFFFFF);
+  v2i16_a = v2i16_a + static_cast<short>(0x7FFF);
+  v2i8_a = v2i8_a + static_cast<char>(0x7F);
+
+  // Legal lower bounds.
+  v2i64_a = v2i64_a + (-9223372036854775807);
+  v2i32_a = v2i32_a + (-2147483648);
+  v2i16_a = v2i16_a + (-32768);
+  v2i8_a = v2i8_a + (-128);
+
+  // One increment/decrement more than the type can hold
+  v2i32_a = v2i32_a + 2147483648; // expected-warning {{implicit conversion from 'long' to 'v2i32' (vector of 2 'int' values) changes value from 2147483648 to -2147483648}}
+  v2i16_a = v2i16_a + 32768;      // expected-warning {{implicit conversion from 'int' to 'v2i16' (vector of 2 'short' values) changes value from 32768 to -32768}}
+  v2i8_a = v2i8_a + 128;          // expected-warning {{implicit conversion from 'int' to 'v2i8' (vector of 2 'char' values) changes value from 128 to -128}}
+
+  v2i32_a = v2i32_a + (-2147483649); // expected-warning {{implicit conversion from 'long' to 'v2i32' (vector of 2 'int' values) changes value from -2147483649 to 2147483647}}
+  v2i16_a = v2i16_a + (-32769);      // expected-warning {{implicit conversion from 'int' to 'v2i16' (vector of 2 'short' values) changes value from -32769 to 32767}}
+  v2i8_a = v2i8_a + (-129);          // expected-error {{cannot convert between scalar type 'int' and vector type 'v2i8' (vector of 2 'char' values) as implicit conversion would cause truncation}}
+}
diff --git a/test/Sema/vector-ops.c b/test/Sema/vector-ops.c
index 9cdd9d2f1748..575f38b972f5 100644
--- a/test/Sema/vector-ops.c
+++ b/test/Sema/vector-ops.c
@@ -13,11 +13,11 @@ void test1(v2u v2ua, v2s v2sa, v2f v2fa) {
   (void)(~v2fa); // expected-error{{invalid argument type 'v2f' (vector of 2 'float' values) to unary}}
 
   // Comparison operators
-  v2ua = (v2ua==v2sa); // expected-warning{{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2ua = (v2ua==v2sa); // expected-warning{{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values}}
   v2sa = (v2ua==v2sa);
 
   // Arrays
-  int array1[v2ua]; // expected-error{{size of array has non-integer type 'v2u' (vector of 2 'unsigned int' values)}}
+  int array1[v2ua]; // expected-error{{size of array has non-integer type 'v2u' (vector of 2 'unsigned int' values}}
   int array2[17];
   // FIXME: error message below needs type!
   (void)(array2[v2ua]); // expected-error{{array subscript is not an integer}}
@@ -28,108 +28,108 @@ void test1(v2u v2ua, v2s v2sa, v2f v2fa) {
 }
 
 void testLogicalVecVec(v2u v2ua, v2s v2sa, v2f v2fa) {
-
   // Logical operators
-  v2ua = v2ua && v2ua; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2ua = v2ua || v2ua; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2ua = v2ua && v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
+  v2ua = v2ua || v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
 
-  v2ua = v2sa && v2ua; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2ua = v2sa || v2ua; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2ua = v2sa && v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
+  v2ua = v2sa || v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
 
-  v2ua = v2ua && v2fa; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2ua = v2ua || v2fa; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2ua = v2ua && v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2ua = v2ua || v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2ua = v2sa && v2fa; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2ua = v2sa || v2fa; // expected-warning {{incompatible vector types assigning to 'v2u' (vector of 2 'unsigned int' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2ua = v2sa && v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2ua = v2sa || v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2sa = v2sa && v2sa;
-  v2sa = v2sa || v2sa;
+  v2sa = v2sa && v2sa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2s'}}
+  v2sa = v2sa || v2sa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2s'}}
 
-  v2sa = v2ua && v2ua;
-  v2sa = v2ua || v2ua;
+  v2sa = v2ua && v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
+  v2sa = v2ua || v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
 
-  v2sa = v2sa && v2ua;
-  v2sa = v2sa || v2ua;
+  v2sa = v2sa && v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
+  v2sa = v2sa || v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
 
-  v2sa = v2sa && v2fa;
-  v2sa = v2sa || v2fa;
+  v2sa = v2sa && v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2sa = v2sa || v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2sa = v2ua && v2fa;
-  v2sa = v2ua || v2fa;
+  v2sa = v2ua && v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2sa = v2ua || v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2fa = v2fa && v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2fa || v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2fa && v2fa; // expected-error {{logical expression with vector types 'v2f' (vector of 2 'float' values) and 'v2f'}}
+  v2fa = v2fa || v2fa; // expected-error {{logical expression with vector types 'v2f' (vector of 2 'float' values) and 'v2f'}}
 
-  v2fa = v2sa && v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2sa || v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2sa && v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2fa = v2sa || v2fa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2fa = v2ua && v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2ua || v2fa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2ua && v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
+  v2fa = v2ua || v2fa; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2f' (vector of 2 'float' values)}}
 
-  v2fa = v2ua && v2ua; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2ua || v2ua; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2ua && v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
+  v2fa = v2ua || v2ua; // expected-error {{logical expression with vector types 'v2u' (vector of 2 'unsigned int' values) and 'v2u'}}
 
-  v2fa = v2sa && v2sa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2sa || v2sa; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2sa && v2sa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2s'}}
+  v2fa = v2sa || v2sa; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2s'}}
 
-  v2fa = v2sa && v2ua; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
-  v2fa = v2sa || v2ua; // expected-warning {{incompatible vector types assigning to 'v2f' (vector of 2 'float' values) from '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}}
+  v2fa = v2sa && v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
+  v2fa = v2sa || v2ua; // expected-error {{logical expression with vector types 'v2s' (vector of 2 'int' values) and 'v2u' (vector of 2 'unsigned int' values)}}
 }
 
 void testLogicalVecScalar(v2u v2ua, v2s v2sa, v2f v2fa) {
-
   unsigned u1;
-  v2ua = v2ua && u1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}}
-  v2ua = v2ua || u1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}}
+  v2ua = v2ua && u1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'unsigned int' is only supported in C++}} 
+  v2ua = v2ua || u1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'unsigned int' is only supported in C++}} 
 
-  v2sa = v2sa && u1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
-  v2sa = v2sa || u1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
+  v2sa = v2sa && u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2s' (vector of 2 'int' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
+  v2sa = v2sa || u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2s' (vector of 2 'int' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
 
-  v2ua = v2sa && u1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
-  v2ua = v2sa || u1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
-  v2sa = v2ua && u1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}}
-  v2sa = v2ua || u1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'unsigned int')}}
+  v2ua = v2sa && u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2s' (vector of 2 'int' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
+  v2ua = v2sa || u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2s' (vector of 2 'int' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'unsigned int')}}
+  v2sa = v2ua && u1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'unsigned int' is only supported in C++}}
+  v2sa = v2ua || u1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'unsigned int' is only supported in C++}}
 
-  v2ua = v2fa && u1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
-  v2ua = v2fa || u1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
+  v2ua = v2fa && u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
+  v2ua = v2fa || u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
 
-  v2sa = v2fa && u1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
-  v2sa = v2fa || u1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'unsigned int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
+  v2sa = v2fa && u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
+  v2sa = v2fa || u1; // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'unsigned int')}}
 
   int s1;
-  v2ua = v2ua && s1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'int')}}
-  v2ua = v2ua || s1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'int')}}
+  v2ua = v2ua && s1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'int' is only supported in C++}}
+  v2ua = v2ua || s1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'int' is only supported in C++}}
 
-  v2sa = v2sa && s1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'int')}}
-  v2sa = v2sa || s1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'int')}}
+  v2sa = v2sa && s1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'int' is only supported in C++}}
+  v2sa = v2sa || s1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'int' is only supported in C++}}
 
-  v2ua = v2sa && s1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'int')}}
-  v2ua = v2sa || s1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'int')}}
-  v2sa = v2ua && s1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'int')}}
-  v2sa = v2ua || s1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'int')}}
+  v2ua = v2sa && s1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'int' is only supported in C++}}
+  v2ua = v2sa || s1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'int' is only supported in C++}}
 
-  v2ua = v2fa && s1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int')}}
-  v2ua = v2fa || s1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int')}}
+  v2sa = v2ua && s1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'int' is only supported in C++}}
+  v2sa = v2ua || s1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'int' is only supported in C++}}
 
-  v2sa = v2fa && s1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int')}}
-  v2sa = v2fa || s1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'int')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int')}}
+  v2ua = v2fa && s1; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int'}}
+  v2ua = v2fa || s1; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int'}}
+
+  v2sa = v2fa && s1; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int'}}
+  v2sa = v2fa || s1; // expected-error {{cannot convert between scalar type 'int' and vector type 'v2f' (vector of 2 'float' values) as implicit conversion would cause truncation}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'int'}}
 
   float f1;
-  v2ua = v2ua && f1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'float')}}
-  v2ua = v2ua || f1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'float')}}
+  v2ua = v2ua && f1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'float' is only supported in C++}}
+  v2ua = v2ua || f1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'float' is only supported in C++}}
 
-  v2sa = v2sa && f1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'float')}}
-  v2sa = v2sa || f1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'float')}}
+  v2sa = v2sa && f1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'float' is only supported in C++}}
+  v2sa = v2sa || f1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'float' is only supported in C++}}
 
-  v2ua = v2sa && f1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'float')}}
-  v2ua = v2sa || f1; // expected-error {{cannot convert between vector values of different size ('v2s' (vector of 2 'int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2s' (vector of 2 'int' values) and 'float')}}
-  v2sa = v2ua && f1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'float')}}
-  v2sa = v2ua || f1; // expected-error {{cannot convert between vector values of different size ('v2u' (vector of 2 'unsigned int' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2u' (vector of 2 'unsigned int' values) and 'float')}}
+  v2ua = v2sa && f1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'float' is only supported in C++}}
+  v2ua = v2sa || f1; // expected-error {{logical expression with vector type 'v2s' (vector of 2 'int' values) and non-vector type 'float' is only supported in C++}}
 
-  v2ua = v2fa && f1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'float')}}
-  v2ua = v2fa || f1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'float')}}
+  v2sa = v2ua && f1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'float' is only supported in C++}}
+  v2sa = v2ua || f1; // expected-error {{logical expression with vector type 'v2u' (vector of 2 'unsigned int' values) and non-vector type 'float' is only supported in C++}}
 
-  v2sa = v2fa && f1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'float')}}
-  v2sa = v2fa || f1; // expected-error {{cannot convert between vector values of different size ('v2f' (vector of 2 'float' values) and 'float')}} expected-error {{invalid operands to binary expression ('v2f' (vector of 2 'float' values) and 'float')}}
+  v2ua = v2fa && f1; // expected-error {{logical expression with vector type 'v2f' (vector of 2 'float' values) and non-vector type 'float' is only supported in C++}}
+  v2ua = v2fa || f1; // expected-error {{logical expression with vector type 'v2f' (vector of 2 'float' values) and non-vector type 'float' is only supported in C++}}
+
+  v2sa = v2fa && f1; // expected-error {{logical expression with vector type 'v2f' (vector of 2 'float' values) and non-vector type 'float' is only supported in C++}}
+  v2sa = v2fa || f1; // expected-error {{logical expression with vector type 'v2f' (vector of 2 'float' values) and non-vector type 'float' is only supported in C++}}
 
 }
diff --git a/test/Sema/zvector.c b/test/Sema/zvector.c
index d1cf1aa01f4b..740163fcd9d5 100644
--- a/test/Sema/zvector.c
+++ b/test/Sema/zvector.c
@@ -326,14 +326,14 @@ void foo(void)
   bc = bc + sc2; // expected-error {{incompatible type}}
   bc = sc + bc2; // expected-error {{incompatible type}}
 
-  sc = sc + sc_scalar; // expected-error {{cannot convert}}
-  sc = sc + uc_scalar; // expected-error {{cannot convert}}
-  sc = sc_scalar + sc; // expected-error {{cannot convert}}
-  sc = uc_scalar + sc; // expected-error {{cannot convert}}
-  uc = uc + sc_scalar; // expected-error {{cannot convert}}
-  uc = uc + uc_scalar; // expected-error {{cannot convert}}
-  uc = sc_scalar + uc; // expected-error {{cannot convert}}
-  uc = uc_scalar + uc; // expected-error {{cannot convert}}
+  sc = sc + sc_scalar;
+  sc = sc + uc_scalar; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type '__vector signed char' (vector of 16 'signed char' values) as implicit conversion would cause truncation}}
+  sc = sc_scalar + sc;
+  sc = uc_scalar + sc; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type '__vector signed char' (vector of 16 'signed char' values) as implicit conversion would cause truncation}}
+  uc = uc + sc_scalar; // expected-error {{implicit conversion changes signedness: 'signed char' to '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  uc = uc + uc_scalar;
+  uc = sc_scalar + uc; // expected-error {{implicit conversion changes signedness: 'signed char' to '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  uc = uc_scalar + uc;
 
   ss = ss + ss2;
   us = us + us2;
@@ -368,10 +368,10 @@ void foo(void)
   sc += sl2; // expected-error {{cannot convert}}
   sc += fd2; // expected-error {{cannot convert}}
 
-  sc += sc_scalar; // expected-error {{cannot convert}}
-  sc += uc_scalar; // expected-error {{cannot convert}}
-  uc += sc_scalar; // expected-error {{cannot convert}}
-  uc += uc_scalar; // expected-error {{cannot convert}}
+  sc += sc_scalar;
+  sc += uc_scalar; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type '__vector signed char' (vector of 16 'signed char' values) as implicit conversion would cause truncation}}
+  uc += sc_scalar; // expected-error {{implicit conversion changes signedness: 'signed char' to '__vector unsigned char' (vector of 16 'unsigned char' values)}}
+  uc += uc_scalar;
 
   ss += ss2;
   us += us2;
diff --git a/test/SemaCXX/constructor-initializer.cpp b/test/SemaCXX/constructor-initializer.cpp
index c5de33cedb90..102ff1e80d03 100644
--- a/test/SemaCXX/constructor-initializer.cpp
+++ b/test/SemaCXX/constructor-initializer.cpp
@@ -302,3 +302,22 @@ namespace PR14073 {
   struct S2 { union { union { int n; }; char c; }; S2() : n(n) {} };  // expected-warning {{field 'n' is uninitialized when used here}}
   struct S3 { struct { int n; }; S3() : n(n) {} };  // expected-warning {{field 'n' is uninitialized when used here}}
 }
+
+namespace PR10758 {
+struct A;
+struct B {
+  B (A const &); // expected-note 2 {{candidate constructor not viable: no known conversion from 'const PR10758::B' to 'const PR10758::A &' for 1st argument}}
+  B (B &); // expected-note 2 {{candidate constructor not viable: 1st argument ('const PR10758::B') would lose const qualifier}}
+};
+struct A {
+  A (B); // expected-note 2 {{passing argument to parameter here}}
+};
+
+B f(B const &b) {
+  return b; // expected-error {{no matching constructor for initialization of 'PR10758::B'}}
+}
+
+A f2(const B &b) {
+  return b; // expected-error {{no matching constructor for initialization of 'PR10758::B'}}
+}
+}
diff --git a/test/SemaCXX/cxx1y-generic-lambdas.cpp b/test/SemaCXX/cxx1y-generic-lambdas.cpp
index 1993c6e1853d..9f3c77591a86 100644
--- a/test/SemaCXX/cxx1y-generic-lambdas.cpp
+++ b/test/SemaCXX/cxx1y-generic-lambdas.cpp
@@ -986,3 +986,10 @@ class Enclosing3 {
   );
 };
 }
+
+namespace PR32638 {
+ //https://bugs.llvm.org/show_bug.cgi?id=32638
+ void test() {
+    [](auto x) noexcept(noexcept(x)) { } (0);
+ }
+}
\ No newline at end of file
diff --git a/test/SemaCXX/cxx1y-variable-templates_top_level.cpp b/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
index b4963646838c..a78548b6f128 100644
--- a/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
+++ b/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
@@ -9,7 +9,7 @@
 #endif
 
 template<typename T> 
-T pi = T(3.1415926535897932385); // expected-note {{template is declared here}}
+T pi = T(3.1415926535897932385); // expected-note 2{{declared here}}
 
 template<typename T> 
 CONST T cpi = T(3.1415926535897932385); // expected-note {{template is declared here}}
@@ -58,10 +58,9 @@ namespace use_in_top_level_funcs {
 namespace shadow {
   void foo() {
     int ipi0 = pi<int>;
-    int pi;
+    int pi; // expected-note {{found}}
     int a = pi;
-    int ipi = pi<int>;  // expected-error {{expected '(' for function-style cast or type construction}} \
-                        // expected-error {{expected expression}}
+    int ipi = pi<int>;  // expected-error {{'pi' does not name a template but is followed by template arguments; did you mean '::pi'?}}
   }
 }
 
diff --git a/test/SemaCXX/enable_if.cpp b/test/SemaCXX/enable_if.cpp
index 9a06d3866110..93014f50d508 100644
--- a/test/SemaCXX/enable_if.cpp
+++ b/test/SemaCXX/enable_if.cpp
@@ -499,3 +499,17 @@ void run() {
   }
 }
 }
+
+namespace TypeOfFn {
+  template <typename T, typename U>
+  struct is_same;
+
+  template <typename T> struct is_same<T, T> {
+    enum { value = 1 };
+  };
+
+  void foo(int a) __attribute__((enable_if(a, "")));
+  void foo(float a) __attribute__((enable_if(1, "")));
+
+  static_assert(is_same<__typeof__(foo)*, decltype(&foo)>::value, "");
+}
diff --git a/test/SemaCXX/for-range-examples.cpp b/test/SemaCXX/for-range-examples.cpp
index 08a9982c6378..d6b527ff8a50 100644
--- a/test/SemaCXX/for-range-examples.cpp
+++ b/test/SemaCXX/for-range-examples.cpp
@@ -241,3 +241,37 @@ namespace pr18587 {
     }
   }
 }
+
+namespace PR32933 {
+// https://bugs.llvm.org/show_bug.cgi?id=32933
+void foo ()
+{ 
+  int b = 1, a[b];
+  a[0] = 0;
+  [&] { for (int c : a) 0; } ();
+}
+
+
+int foo(int b) {
+  int varr[b][(b+=8)];
+  b = 15; 
+  [&] {
+    int i = 0;
+    for (auto &c : varr) 
+    {
+      c[0] = ++b;
+    }
+    [&] {
+      int i = 0;
+      for (auto &c : varr) {
+        int j = 0;
+        for(auto &c2 : c) {
+          ++j;
+        }
+        ++i;
+      }
+    }();
+  }();
+  return b;
+}
+}
\ No newline at end of file
diff --git a/test/SemaCXX/invalid-member-expr.cpp b/test/SemaCXX/invalid-member-expr.cpp
index 172be6b8266d..fd50d328da67 100644
--- a/test/SemaCXX/invalid-member-expr.cpp
+++ b/test/SemaCXX/invalid-member-expr.cpp
@@ -53,9 +53,7 @@ namespace test3 {
 namespace rdar11293995 {
 
 struct Length {
-  explicit Length(PassRefPtr<CalculationValue>); // expected-error {{unknown type name}} \
-                    expected-error {{expected ')'}} \
-                    expected-note {{to match this '('}}
+  explicit Length(PassRefPtr<CalculationValue>); // expected-error {{no template named 'PassRefPtr}} expected-error {{undeclared identifier 'CalculationValue'}}
 };
 
 struct LengthSize {
diff --git a/test/SemaCXX/modules-ts.cppm b/test/SemaCXX/modules-ts.cppm
index 16695f6463a8..d1d7aaa96e6c 100644
--- a/test/SemaCXX/modules-ts.cppm
+++ b/test/SemaCXX/modules-ts.cppm
@@ -17,7 +17,8 @@ static int m; // ok, internal linkage, so no redefinition error
 int n;
 #if TEST >= 2
 // expected-error@-2 {{redefinition of '}}
-// expected-note@-3 {{previous}}
+// expected-note@-3 {{unguarded header; consider using #ifdef guards or #pragma once}}
+// expected-note-re@modules-ts.cppm:1 {{'{{.*}}modules-ts.cppm' included multiple times, additional include site here}}
 #endif
 
 #if TEST == 0
diff --git a/test/SemaCXX/type-traits.cpp b/test/SemaCXX/type-traits.cpp
index 9da59b93c503..919122576222 100644
--- a/test/SemaCXX/type-traits.cpp
+++ b/test/SemaCXX/type-traits.cpp
@@ -1256,7 +1256,7 @@ void is_trivially_copyable2()
   int t33[F(__is_trivially_copyable(ExtDefaulted))];
 
   int t34[T(__is_trivially_copyable(const int))];
-  int t35[F(__is_trivially_copyable(volatile int))];
+  int t35[T(__is_trivially_copyable(volatile int))];
 }
 
 struct CStruct {
diff --git a/test/SemaCXX/typo-correction.cpp b/test/SemaCXX/typo-correction.cpp
index c59ee618f929..2d78f06c5d33 100644
--- a/test/SemaCXX/typo-correction.cpp
+++ b/test/SemaCXX/typo-correction.cpp
@@ -524,13 +524,16 @@ namespace shadowed_template {
 template <typename T> class Fizbin {};  // expected-note {{'::shadowed_template::Fizbin' declared here}}
 class Baz {
    int Fizbin();
-   // TODO: Teach the parser to recover from the typo correction instead of
-   // continuing to treat the template name as an implicit-int declaration.
-   Fizbin<int> qux;  // expected-error {{unknown type name 'Fizbin'; did you mean '::shadowed_template::Fizbin'?}} \
-                     // expected-error {{expected member name or ';' after declaration specifiers}}
+   Fizbin<int> qux;  // expected-error {{no template named 'Fizbin'; did you mean '::shadowed_template::Fizbin'?}}
 };
 }
 
+namespace no_correct_template_id_to_non_template {
+  struct Frobnatz {}; // expected-note {{declared here}}
+  Frobnats fn; // expected-error {{unknown type name 'Frobnats'; did you mean 'Frobnatz'?}}
+  Frobnats<int> fni; // expected-error-re {{no template named 'Frobnats'{{$}}}}
+}
+
 namespace PR18852 {
 void func() {
   struct foo {
diff --git a/test/SemaCXX/vector-no-lax.cpp b/test/SemaCXX/vector-no-lax.cpp
index a85f7f9db060..3cedcb1e8ce5 100644
--- a/test/SemaCXX/vector-no-lax.cpp
+++ b/test/SemaCXX/vector-no-lax.cpp
@@ -4,6 +4,6 @@ typedef int __attribute__((vector_size (16))) vSInt32;
 
 vSInt32 foo (vUInt32 a) {
   vSInt32 b = { 0, 0, 0, 0 };
-  b += a; // expected-error{{cannot convert between vector values}}
+  b += a; // expected-error{{cannot convert between vector type 'vUInt32' (vector of 4 'unsigned int' values) and vector type 'vSInt32' (vector of 4 'int' values) as implicit conversion would cause truncation}}
   return b;
 }
diff --git a/test/SemaCXX/warn-unused-filescoped.cpp b/test/SemaCXX/warn-unused-filescoped.cpp
index 18defee7d04a..93c6bbd7edc9 100644
--- a/test/SemaCXX/warn-unused-filescoped.cpp
+++ b/test/SemaCXX/warn-unused-filescoped.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wunused -Wunused-member-function -Wno-unused-local-typedefs -Wno-c++11-extensions -std=c++98 %s
-// RUN: %clang_cc1 -fsyntax-only -verify -Wunused -Wunused-member-function -Wno-unused-local-typedefs -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunused -Wunused-template -Wunused-member-function -Wno-unused-local-typedefs -Wno-c++11-extensions -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunused -Wunused-template -Wunused-member-function -Wno-unused-local-typedefs -std=c++14 %s
 
 #ifdef HEADER
 
@@ -65,7 +65,7 @@ namespace {
   template <> void TS<int>::m() { }  // expected-warning{{unused}}
 
   template <typename T>
-  void tf() { }
+  void tf() { }  // expected-warning{{unused}}
   template <> void tf<int>() { }  // expected-warning{{unused}}
   
   struct VS {
@@ -200,6 +200,18 @@ void bar() { void func() __attribute__((used)); }
 static void func() {}
 }
 
+namespace test9 {
+template<typename T>
+static void completeRedeclChainForTemplateSpecialization() { } // expected-warning {{unused}}
+}
+
+namespace test10 {
+#if __cplusplus >= 201103L
+template<class T>
+constexpr T pi = T(3.14); // expected-warning {{unused}}
+#endif
+}
+
 namespace pr19713 {
 #if __cplusplus >= 201103L
   // FIXME: We should warn on both of these.
diff --git a/test/SemaObjC/method-bad-param.m b/test/SemaObjC/method-bad-param.m
index ad67a34edb00..a7f0745ddbad 100644
--- a/test/SemaObjC/method-bad-param.m
+++ b/test/SemaObjC/method-bad-param.m
@@ -20,6 +20,12 @@
 }
 @end
 
+// Ensure that this function is properly marked as a failure.
+void func_with_bad_call(bar* b, foo* f) {
+  [b cccccc:5]; // expected-warning {{instance method '-cccccc:' not found}}
+                // expected-note@-17 {{receiver is instance of class declared here}}
+}
+
 void somefunc(foo x) {} // expected-error {{interface type 'foo' cannot be passed by value; did you forget * in 'foo'}}
 foo somefunc2() {} // expected-error {{interface type 'foo' cannot be returned by value; did you forget * in 'foo'}}
 
diff --git a/test/SemaObjC/unguarded-availability.m b/test/SemaObjC/unguarded-availability.m
index ae921f4a27b3..071a21ea1b3f 100644
--- a/test/SemaObjC/unguarded-availability.m
+++ b/test/SemaObjC/unguarded-availability.m
@@ -8,7 +8,7 @@
 int func_10_11() AVAILABLE_10_11; // expected-note 4 {{'func_10_11' has been explicitly marked partial here}}
 
 #ifdef OBJCPP
-// expected-note@+2 2 {{marked partial here}}
+// expected-note@+2 6 {{marked partial here}}
 #endif
 int func_10_12() AVAILABLE_10_12; // expected-note 6 {{'func_10_12' has been explicitly marked partial here}}
 
@@ -48,7 +48,7 @@ void star_case() {
   } else
     func_10_11(); // expected-warning{{'func_10_11' is only available on macOS 10.11 or newer}} expected-note{{enclose 'func_10_11' in an @available check to silence this warning}}
 
-  if (@available(macos 10.11, *)) {
+  if (@available(macOS 10.11, *)) {
     if (@available(ios 8, *)) {
       func_10_11();
       func_10_12(); // expected-warning{{'func_10_12' is only available on macOS 10.12 or newer}} expected-note{{enclose}}
@@ -176,7 +176,7 @@ int instantiate_with_availability_attr() {
 }
 
 int instantiate_availability() {
-  if (@available(macos 10.12, *))
+  if (@available(macOS 10.12, *))
     with_availability_attr<int_10_12>();
   else
     with_availability_attr<int_10_12>(); // expected-warning{{'with_availability_attr<int>' is only available on macOS 10.11 or newer}} expected-warning{{'int_10_12' is only available on macOS 10.12 or newer}} expected-note 2 {{enclose}}
@@ -188,4 +188,19 @@ auto topLevelLambda = [] () {
     func_10_12();
 };
 
+void functionInFunction() {
+  func_10_12(); // expected-warning{{'func_10_12' is only available on macOS 10.12 or newer}} expected-note{{@available}}
+  struct DontWarnTwice {
+    void f() {
+      func_10_12(); // expected-warning{{'func_10_12' is only available on macOS 10.12 or newer}} expected-note{{@available}}
+    }
+  };
+  void([] () {
+    func_10_12(); // expected-warning{{'func_10_12' is only available on macOS 10.12 or newer}} expected-note{{@available}}
+  });
+  (void)(^ {
+    func_10_12(); // expected-warning{{'func_10_12' is only available on macOS 10.12 or newer}} expected-note{{@available}}
+  });
+}
+
 #endif
diff --git a/test/SemaObjCXX/interface-return-type.mm b/test/SemaObjCXX/interface-return-type.mm
new file mode 100644
index 000000000000..9fff8610ae0d
--- /dev/null
+++ b/test/SemaObjCXX/interface-return-type.mm
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify
+
+@class NSObject;
+template<typename T> struct C {
+      static T f(); // expected-error {{interface type 'NSObject' cannot be returned by value; did you forget * in 'NSObject'?}}
+};
+int g() { NSObject *x = C<NSObject>::f(); }//expected-error {{no member named 'f' in 'C<NSObject>'}} expected-note {{in instantiation of template class 'C<NSObject>' requested here}}
diff --git a/test/SemaObjCXX/is-base-of.mm b/test/SemaObjCXX/is-base-of.mm
new file mode 100644
index 000000000000..9cf16661b0c5
--- /dev/null
+++ b/test/SemaObjCXX/is-base-of.mm
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+
+@interface NSObj
+@end
+
+@interface NSChild : NSObj
+@end
+
+static_assert(__is_base_of(NSObj, NSChild), "");
+static_assert(!__is_base_of(NSChild, NSObj), "");
+
+static_assert(__is_base_of(NSObj, NSObj), "");
+
+static_assert(!__is_base_of(NSObj *, NSChild *), "");
+static_assert(!__is_base_of(NSChild *, NSObj *), "");
+
+static_assert(__is_base_of(const volatile NSObj, NSChild), "");
+static_assert(__is_base_of(NSObj, const volatile NSChild), "");
+
+@class NSForward; // expected-note{{forward declaration of class}}
+
+static_assert(!__is_base_of(NSForward, NSObj), "");
+static_assert(!__is_base_of(NSObj, NSForward), ""); // expected-error{{incomplete type 'NSForward'}}
+
+static_assert(!__is_base_of(id, NSObj), "");
diff --git a/test/SemaOpenCL/array-init.cl b/test/SemaOpenCL/array-init.cl
new file mode 100644
index 000000000000..d9691d86dd81
--- /dev/null
+++ b/test/SemaOpenCL/array-init.cl
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
+// expected-no-diagnostics
+
+__kernel void k1(queue_t q1, queue_t q2) {
+  queue_t q[] = {q1, q2};
+}
+
+__kernel void k2(read_only pipe int p) {
+  reserve_id_t i1 = reserve_read_pipe(p, 1);
+  reserve_id_t i2 = reserve_read_pipe(p, 1);
+  reserve_id_t i[] = {i1, i2};
+}
+
+event_t create_event();
+__kernel void k3() {
+  event_t e1 = create_event();
+  event_t e2 = create_event();
+  event_t e[] = {e1, e2};
+}
+
diff --git a/test/SemaOpenCL/storageclass.cl b/test/SemaOpenCL/storageclass.cl
index a93f8244dcbd..f457cfd1d3f6 100644
--- a/test/SemaOpenCL/storageclass.cl
+++ b/test/SemaOpenCL/storageclass.cl
@@ -5,7 +5,7 @@ constant int G2 = 0;
 int G3 = 0;        // expected-error{{program scope variable must reside in constant address space}}
 global int G4 = 0; // expected-error{{program scope variable must reside in constant address space}}
 
-void kernel foo() {
+void kernel foo(int x) {
   // static is not allowed at local scope before CL2.0
   static int S1 = 5;          // expected-error{{variables in function scope cannot be declared static}}
   static constant int S2 = 5; // expected-error{{variables in function scope cannot be declared static}}
@@ -15,6 +15,12 @@ void kernel foo() {
 
   auto int L3 = 7; // expected-error{{OpenCL version 1.2 does not support the 'auto' storage class specifier}}
   global int L4;   // expected-error{{function scope variable cannot be declared in global address space}}
+
+  constant int L5 = x; // expected-error {{initializer element is not a compile-time constant}}
+  global int *constant L6 = &G4;
+  private int *constant L7 = &x; // expected-error {{initializer element is not a compile-time constant}}
+  constant int *constant L8 = &L1;
+  local int *constant L9 = &L2; // expected-error {{initializer element is not a compile-time constant}}
 }
 
 static void kernel bar() { // expected-error{{kernel functions cannot be declared static}}
@@ -29,4 +35,7 @@ void f() {
   }
   global int L3; // expected-error{{function scope variable cannot be declared in global address space}}
   extern constant float L4;
+  extern local float L5; // expected-error{{extern variable must reside in constant address space}}
+  static int L6 = 0;     // expected-error{{variables in function scope cannot be declared static}}
+  static int L7;         // expected-error{{variables in function scope cannot be declared static}}
 }
diff --git a/test/SemaTemplate/deduction-crash.cpp b/test/SemaTemplate/deduction-crash.cpp
index ff7421a910bd..c94c9db94e06 100644
--- a/test/SemaTemplate/deduction-crash.cpp
+++ b/test/SemaTemplate/deduction-crash.cpp
@@ -2,7 +2,7 @@
 
 // Note that the error count below doesn't matter. We just want to
 // make sure that the parser doesn't crash.
-// CHECK: 16 errors
+// CHECK: 17 errors
 
 // PR7511
 template<a>
diff --git a/test/SemaTemplate/default-arguments.cpp b/test/SemaTemplate/default-arguments.cpp
index d3e249db7ee2..b5b042c64a71 100644
--- a/test/SemaTemplate/default-arguments.cpp
+++ b/test/SemaTemplate/default-arguments.cpp
@@ -207,3 +207,19 @@ Y<false> y2;
 
 } // end ns1
 } // end ns PR26134
+
+namespace friends {
+  namespace ns {
+    template<typename> struct A {
+      template<typename> friend void f();
+      template<typename> friend struct X;
+    };
+    template<typename = int> void f(); // expected-warning 0-1{{extension}}
+    template<typename = int> struct X;
+    A<int> a;
+  }
+  namespace ns {
+    void g() { f(); }
+    X<int> *p;
+  }
+}
diff --git a/test/SemaTemplate/explicit-instantiation.cpp b/test/SemaTemplate/explicit-instantiation.cpp
index 010716dd1426..42d9f4d332a9 100644
--- a/test/SemaTemplate/explicit-instantiation.cpp
+++ b/test/SemaTemplate/explicit-instantiation.cpp
@@ -95,7 +95,7 @@ namespace PR7622 {
   struct basic_streambuf;
 
   template<typename,typename>
-  struct basic_streambuf{friend bob<>()}; // expected-error{{unknown type name 'bob'}} \
+  struct basic_streambuf{friend bob<>()}; // expected-error{{no template named 'bob'}} \
                                           // expected-error{{expected member name or ';' after declaration specifiers}}
   template struct basic_streambuf<int>;
 }
diff --git a/test/SemaTemplate/explicit-specialization-member.cpp b/test/SemaTemplate/explicit-specialization-member.cpp
index f302836c7e4b..c0c36808b492 100644
--- a/test/SemaTemplate/explicit-specialization-member.cpp
+++ b/test/SemaTemplate/explicit-specialization-member.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fcxx-exceptions
 template<typename T>
 struct X0 {
   typedef T* type;
@@ -57,3 +57,12 @@ template<typename T> struct Helper {
 template<typename T> void Helper<T>::func<2>() {} // expected-error {{cannot specialize a member}} \
                                                   // expected-error {{no function template matches}}
 }
+
+namespace SpecLoc {
+  template <typename T> struct A {
+    static int n; // expected-note {{previous}}
+    static void f(); // expected-note {{previous}}
+  };
+  template<> float A<int>::n; // expected-error {{different type}}
+  template<> void A<int>::f() throw(); // expected-error {{does not match}}
+}
diff --git a/test/SemaTemplate/ms-lookup-template-base-classes.cpp b/test/SemaTemplate/ms-lookup-template-base-classes.cpp
index 6afc7091260d..a41248ee1b8e 100644
--- a/test/SemaTemplate/ms-lookup-template-base-classes.cpp
+++ b/test/SemaTemplate/ms-lookup-template-base-classes.cpp
@@ -347,8 +347,7 @@ template <typename T> struct B : A<T> {
 };
 template <typename T> struct C : A<T> {
   // Incorrect form.
-  NameFromBase<T> m; // expected-error {{unknown type name 'NameFromBase'}}
-  //expected-error@-1 {{expected member name or ';' after declaration specifiers}}
+  NameFromBase<T> m; // expected-error {{no template named 'NameFromBase'}}
 };
 }
 
diff --git a/test/SemaTemplate/typo-template-name.cpp b/test/SemaTemplate/typo-template-name.cpp
new file mode 100644
index 000000000000..fe5201a8e26c
--- /dev/null
+++ b/test/SemaTemplate/typo-template-name.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++1z %s -verify -Wno-unused
+
+namespace InExpr {
+  namespace A {
+    void typo_first_a(); // expected-note {{found}}
+    template<typename T> void typo_first_b(); // expected-note 2{{declared here}}
+  }
+  void testA() { A::typo_first_a<int>(); } // expected-error {{'typo_first_a' does not name a template but is followed by template arguments; did you mean 'typo_first_b'?}}
+
+  namespace B {
+    void typo_first_b(); // expected-note {{found}}
+  }
+  void testB() { B::typo_first_b<int>(); } // expected-error {{'typo_first_b' does not name a template but is followed by template arguments; did you mean 'A::typo_first_b'?}}
+
+  struct Base {
+    template<typename T> static void foo(); // expected-note 4{{declared here}}
+    int n;
+  };
+  struct Derived : Base {
+    void foo(); // expected-note {{found}}
+  };
+  // We probably don't want to suggest correcting to .Base::foo<int>
+  void testMember() { Derived().foo<int>(); } // expected-error-re {{does not name a template but is followed by template arguments{{$}}}}
+
+  struct Derived2 : Base {
+    void goo(); // expected-note {{found}}
+  };
+  void testMember2() { Derived2().goo<int>(); } // expected-error {{member 'goo' of 'InExpr::Derived2' is not a template; did you mean 'foo'?}}
+
+  void no_correction() {
+    int foo; // expected-note 3{{found}}
+
+    foo<int>(); // expected-error {{'foo' does not name a template but is followed by template arguments; did you mean 'Base::foo'?}}
+    foo<>(); // expected-error {{'foo' does not name a template but is followed by template arguments; did you mean 'Base::foo'?}}
+    foo<Base *>(); // expected-error {{'foo' does not name a template but is followed by template arguments; did you mean 'Base::foo'?}}
+
+    // These are valid expressions.
+    foo<foo; // expected-warning {{self-comparison}}
+    foo<int()>(0);
+    foo<int(), true>(false);
+    foo<Base{}.n;
+  }
+}
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index 1179fbf39113..1f5d60443197 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -809,6 +809,19 @@ static void PrintCursor(CXCursor Cursor, const char *CommentSchemaFile) {
     if (clang_Cursor_isObjCOptional(Cursor))
       printf(" (@optional)");
 
+    {
+      CXString language;
+      CXString definedIn;
+      unsigned generated;
+      if (clang_Cursor_isExternalSymbol(Cursor, &language, &definedIn,
+                                        &generated)) {
+        printf(" (external lang: %s, defined: %s, gen: %d)",
+            clang_getCString(language), clang_getCString(definedIn), generated);
+        clang_disposeString(language);
+        clang_disposeString(definedIn);
+      }
+    }
+
     if (Cursor.kind == CXCursor_IBOutletCollectionAttr) {
       CXType T =
         clang_getCanonicalType(clang_getIBOutletCollectionType(Cursor));
diff --git a/tools/clang-import-test/clang-import-test.cpp b/tools/clang-import-test/clang-import-test.cpp
index d7ab18478c32..567a4bb4f0a2 100644
--- a/tools/clang-import-test/clang-import-test.cpp
+++ b/tools/clang-import-test/clang-import-test.cpp
@@ -42,6 +42,10 @@ static llvm::cl::list<std::string>
     Imports("import", llvm::cl::ZeroOrMore,
             llvm::cl::desc("Path to a file containing declarations to import"));
 
+static llvm::cl::opt<bool>
+    Direct("direct", llvm::cl::Optional,
+             llvm::cl::desc("Use the parsed declarations without indirection"));
+
 static llvm::cl::list<std::string>
     ClangArgs("Xcc", llvm::cl::ZeroOrMore,
               llvm::cl::desc("Argument to pass to the CompilerInvocation"),
@@ -172,6 +176,14 @@ BuildCompilerInstance(ArrayRef<const char *> ClangArgv) {
   return Ins;
 }
 
+std::unique_ptr<CompilerInstance>
+BuildCompilerInstance(ArrayRef<std::string> ClangArgs) {
+  std::vector<const char *> ClangArgv(ClangArgs.size());
+  std::transform(ClangArgs.begin(), ClangArgs.end(), ClangArgv.begin(),
+                 [](const std::string &s) -> const char * { return s.data(); });
+  return init_convenience::BuildCompilerInstance(ClangArgv);
+}
+
 std::unique_ptr<ASTContext>
 BuildASTContext(CompilerInstance &CI, SelectorTable &ST, Builtin::Context &BC) {
   auto AST = llvm::make_unique<ASTContext>(
@@ -205,6 +217,21 @@ void AddExternalSource(
   CI.getASTContext().getTranslationUnitDecl()->setHasExternalVisibleStorage();
 }
 
+std::unique_ptr<CompilerInstance> BuildIndirect(std::unique_ptr<CompilerInstance> &CI) {
+  std::vector<const char *> ClangArgv(ClangArgs.size());
+  std::transform(ClangArgs.begin(), ClangArgs.end(), ClangArgv.begin(),
+                 [](const std::string &s) -> const char * { return s.data(); });
+  std::unique_ptr<CompilerInstance> IndirectCI =
+      init_convenience::BuildCompilerInstance(ClangArgv);
+  auto ST = llvm::make_unique<SelectorTable>();
+  auto BC = llvm::make_unique<Builtin::Context>();
+  std::unique_ptr<ASTContext> AST =
+      init_convenience::BuildASTContext(*IndirectCI, *ST, *BC);
+  IndirectCI->setASTContext(AST.release());
+  AddExternalSource(*IndirectCI, CI);
+  return IndirectCI;
+}
+
 llvm::Error ParseSource(const std::string &Path, CompilerInstance &CI,
                         CodeGenerator &CG) {
   SourceManager &SM = CI.getSourceManager();
@@ -231,7 +258,8 @@ Parse(const std::string &Path,
   std::unique_ptr<ASTContext> AST =
       init_convenience::BuildASTContext(*CI, *ST, *BC);
   CI->setASTContext(AST.release());
-  AddExternalSource(*CI, Imports);
+  if (Imports.size())
+    AddExternalSource(*CI, Imports);
 
   auto LLVMCtx = llvm::make_unique<llvm::LLVMContext>();
   std::unique_ptr<CodeGenerator> CG =
@@ -268,8 +296,21 @@ int main(int argc, const char **argv) {
       ImportCIs.push_back(std::move(*ImportCI));
     }
   }
+  std::vector<std::unique_ptr<CompilerInstance>> IndirectCIs;
+  if (!Direct) {
+    for (auto &ImportCI : ImportCIs) {
+      llvm::Expected<std::unique_ptr<CompilerInstance>> IndirectCI =
+          BuildIndirect(ImportCI);
+      if (auto E = IndirectCI.takeError()) {
+        llvm::errs() << llvm::toString(std::move(E));
+        exit(-1);
+      } else {
+        IndirectCIs.push_back(std::move(*IndirectCI));
+      }
+    }
+  }
   llvm::Expected<std::unique_ptr<CompilerInstance>> ExpressionCI =
-      Parse(Expression, ImportCIs);
+      Parse(Expression, Direct ? ImportCIs : IndirectCIs);
   if (auto E = ExpressionCI.takeError()) {
     llvm::errs() << llvm::toString(std::move(E));
     exit(-1);
@@ -277,3 +318,4 @@ int main(int argc, const char **argv) {
     return 0;
   }
 }
+
diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp
index c251d83e2097..9c795ae9c5b7 100644
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -7479,6 +7479,35 @@ unsigned clang_Cursor_isVariadic(CXCursor C) {
   return 0;
 }
 
+unsigned clang_Cursor_isExternalSymbol(CXCursor C,
+                                     CXString *language, CXString *definedIn,
+                                     unsigned *isGenerated) {
+  if (!clang_isDeclaration(C.kind))
+    return 0;
+
+  const Decl *D = getCursorDecl(C);
+
+  auto getExternalSymAttr = [](const Decl *D) -> ExternalSourceSymbolAttr* {
+    if (auto *attr = D->getAttr<ExternalSourceSymbolAttr>())
+      return attr;
+    if (auto *dcd = dyn_cast<Decl>(D->getDeclContext())) {
+      if (auto *attr = dcd->getAttr<ExternalSourceSymbolAttr>())
+        return attr;
+    }
+    return nullptr;
+  };
+  if (auto *attr = getExternalSymAttr(D)) {
+    if (language)
+      *language = cxstring::createDup(attr->getLanguage());
+    if (definedIn)
+      *definedIn = cxstring::createDup(attr->getDefinedIn());
+    if (isGenerated)
+      *isGenerated = attr->getGeneratedDeclaration();
+    return 1;
+  }
+  return 0;
+}
+
 CXSourceRange clang_Cursor_getCommentRange(CXCursor C) {
   if (!clang_isDeclaration(C.kind))
     return clang_getNullRange();
diff --git a/tools/libclang/libclang.exports b/tools/libclang/libclang.exports
index 895dd804b008..d9a406e5741b 100644
--- a/tools/libclang/libclang.exports
+++ b/tools/libclang/libclang.exports
@@ -35,6 +35,7 @@ clang_Cursor_getReceiverType
 clang_Cursor_isAnonymous
 clang_Cursor_isBitField
 clang_Cursor_isDynamicCall
+clang_Cursor_isExternalSymbol
 clang_Cursor_isNull
 clang_Cursor_isObjCOptional
 clang_Cursor_isVariadic
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index 9b833a96f16a..076041406c87 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -342,7 +342,7 @@ TEST_F(FormatTest, FormatIfWithoutCompoundStatement) {
   verifyFormat("if (a)\n  if (b) {\n    f();\n  }\ng();");
 
   FormatStyle AllowsMergedIf = getLLVMStyle();
-  AllowsMergedIf.AlignEscapedNewlinesLeft = true;
+  AllowsMergedIf.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   AllowsMergedIf.AllowShortIfStatementsOnASingleLine = true;
   verifyFormat("if (a)\n"
                "  // comment\n"
@@ -2106,7 +2106,7 @@ TEST_F(FormatTest, LayoutStatementsAroundPreprocessorDirectives) {
 
   verifyIncompleteFormat("void f(\n"
                          "#if A\n"
-                         "    );\n"
+                         ");\n"
                          "#else\n"
                          "#endif");
 }
@@ -2591,6 +2591,60 @@ TEST_F(FormatTest, BreakingBeforeNonAssigmentOperators) {
                Style);
 }
 
+TEST_F(FormatTest, AllowBinPackingInsideArguments) {
+  FormatStyle Style = getLLVMStyle();
+  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
+  Style.BinPackArguments = false;
+  Style.ColumnLimit = 40;
+  verifyFormat("void test() {\n"
+               "  someFunction(\n"
+               "      this + argument + is + quite\n"
+               "      + long + so + it + gets + wrapped\n"
+               "      + but + remains + bin - packed);\n"
+               "}",
+               Style);
+  verifyFormat("void test() {\n"
+               "  someFunction(arg1,\n"
+               "               this + argument + is\n"
+               "                   + quite + long + so\n"
+               "                   + it + gets + wrapped\n"
+               "                   + but + remains + bin\n"
+               "                   - packed,\n"
+               "               arg3);\n"
+               "}",
+               Style);
+  verifyFormat("void test() {\n"
+               "  someFunction(\n"
+               "      arg1,\n"
+               "      this + argument + has\n"
+               "          + anotherFunc(nested,\n"
+               "                        calls + whose\n"
+               "                            + arguments\n"
+               "                            + are + also\n"
+               "                            + wrapped,\n"
+               "                        in + addition)\n"
+               "          + to + being + bin - packed,\n"
+               "      arg3);\n"
+               "}",
+               Style);
+
+  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
+  verifyFormat("void test() {\n"
+               "  someFunction(\n"
+               "      arg1,\n"
+               "      this + argument + has +\n"
+               "          anotherFunc(nested,\n"
+               "                      calls + whose +\n"
+               "                          arguments +\n"
+               "                          are + also +\n"
+               "                          wrapped,\n"
+               "                      in + addition) +\n"
+               "          to + being + bin - packed,\n"
+               "      arg3);\n"
+               "}",
+               Style);
+}
+
 TEST_F(FormatTest, ConstructorInitializers) {
   verifyFormat("Constructor() : Initializer(FitsOnTheLine) {}");
   verifyFormat("Constructor() : Inttializer(FitsOnTheLine) {}",
@@ -4421,7 +4475,7 @@ TEST_F(FormatTest, WrapsTemplateDeclarations) {
   EXPECT_EQ("static_cast<A< //\n"
             "    B> *>(\n"
             "\n"
-            "    );",
+            ");",
             format("static_cast<A<//\n"
                    "    B>*>(\n"
                    "\n"
@@ -6423,7 +6477,7 @@ TEST_F(FormatTest, BreaksStringLiterals) {
   EXPECT_EQ("\"some text other\";", format("\"some text other\";", Style));
 
   FormatStyle AlignLeft = getLLVMStyleWithColumns(12);
-  AlignLeft.AlignEscapedNewlinesLeft = true;
+  AlignLeft.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   EXPECT_EQ("#define A \\\n"
             "  \"some \" \\\n"
             "  \"text \" \\\n"
@@ -6513,7 +6567,7 @@ TEST_F(FormatTest, BreaksStringLiteralsWithin_TMacro) {
             "#if !TEST\n"
             "    _T(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\")\n"
             "#endif\n"
-            "    );",
+            ");",
             format("f(\n"
                    "#if !TEST\n"
                    "_T(\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXn\")\n"
@@ -6824,7 +6878,7 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
   FormatStyle Tab = getLLVMStyleWithColumns(42);
   Tab.IndentWidth = 8;
   Tab.UseTab = FormatStyle::UT_Always;
-  Tab.AlignEscapedNewlinesLeft = true;
+  Tab.AlignEscapedNewlines = FormatStyle::ENAS_Left;
 
   EXPECT_EQ("if (aaaaaaaa && // q\n"
             "    bb)\t\t// w\n"
@@ -7605,14 +7659,21 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                    "int oneTwoThree = 123;\n"
                    "int oneTwo = 12;",
                    Alignment));
-  Alignment.AlignEscapedNewlinesLeft = true;
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign;
+  verifyFormat("#define A \\\n"
+               "  int aaaa       = 12; \\\n"
+               "  int b          = 23; \\\n"
+               "  int ccc        = 234; \\\n"
+               "  int dddddddddd = 2345;",
+               Alignment);
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   verifyFormat("#define A               \\\n"
                "  int aaaa       = 12;  \\\n"
                "  int b          = 23;  \\\n"
                "  int ccc        = 234; \\\n"
                "  int dddddddddd = 2345;",
                Alignment);
-  Alignment.AlignEscapedNewlinesLeft = false;
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_Right;
   verifyFormat("#define A                                                      "
                "                \\\n"
                "  int aaaa       = 12;                                         "
@@ -7879,14 +7940,21 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) {
                    "}",
                    Alignment));
   Alignment.AlignConsecutiveAssignments = false;
-  Alignment.AlignEscapedNewlinesLeft = true;
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign;
+  verifyFormat("#define A \\\n"
+               "  int       aaaa = 12; \\\n"
+               "  float     b = 23; \\\n"
+               "  const int ccc = 234; \\\n"
+               "  unsigned  dddddddddd = 2345;",
+               Alignment);
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   verifyFormat("#define A              \\\n"
                "  int       aaaa = 12; \\\n"
                "  float     b = 23;    \\\n"
                "  const int ccc = 234; \\\n"
                "  unsigned  dddddddddd = 2345;",
                Alignment);
-  Alignment.AlignEscapedNewlinesLeft = false;
+  Alignment.AlignEscapedNewlines = FormatStyle::ENAS_Right;
   Alignment.ColumnLimit = 30;
   verifyFormat("#define A                    \\\n"
                "  int       aaaa = 12;       \\\n"
@@ -8671,7 +8739,6 @@ TEST_F(FormatTest, GetsCorrectBasedOnStyle) {
 TEST_F(FormatTest, ParsesConfigurationBools) {
   FormatStyle Style = {};
   Style.Language = FormatStyle::LK_Cpp;
-  CHECK_PARSE_BOOL(AlignEscapedNewlinesLeft);
   CHECK_PARSE_BOOL(AlignOperands);
   CHECK_PARSE_BOOL(AlignTrailingComments);
   CHECK_PARSE_BOOL(AlignConsecutiveAssignments);
@@ -8794,6 +8861,19 @@ TEST_F(FormatTest, ParsesConfiguration) {
   CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket,
               FormatStyle::BAS_Align);
 
+  Style.AlignEscapedNewlines = FormatStyle::ENAS_Left;
+  CHECK_PARSE("AlignEscapedNewlines: DontAlign", AlignEscapedNewlines,
+              FormatStyle::ENAS_DontAlign);
+  CHECK_PARSE("AlignEscapedNewlines: Left", AlignEscapedNewlines,
+              FormatStyle::ENAS_Left);
+  CHECK_PARSE("AlignEscapedNewlines: Right", AlignEscapedNewlines,
+              FormatStyle::ENAS_Right);
+  // For backward compatibility:
+  CHECK_PARSE("AlignEscapedNewlinesLeft: true", AlignEscapedNewlines,
+              FormatStyle::ENAS_Left);
+  CHECK_PARSE("AlignEscapedNewlinesLeft: false", AlignEscapedNewlines,
+              FormatStyle::ENAS_Right);
+
   Style.UseTab = FormatStyle::UT_ForIndentation;
   CHECK_PARSE("UseTab: Never", UseTab, FormatStyle::UT_Never);
   CHECK_PARSE("UseTab: ForIndentation", UseTab, FormatStyle::UT_ForIndentation);
@@ -9559,7 +9639,7 @@ TEST_F(FormatTest, FormatsLambdas) {
   // Other corner cases.
   verifyFormat("void f() {\n"
                "  bar([]() {} // Did not respect SpacesBeforeTrailingComments\n"
-               "      );\n"
+               "  );\n"
                "}");
 
   // Lambdas created through weird macros.
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp
index 9144fe17e9ec..ff28f304be99 100644
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -367,6 +367,25 @@ TEST_F(FormatTestJS, GoogScopes) {
                "});");
 }
 
+TEST_F(FormatTestJS, IIFEs) {
+  // Internal calling parens; no semi.
+  verifyFormat("(function() {\n"
+               "var a = 1;\n"
+               "}())");
+  // External calling parens; no semi.
+  verifyFormat("(function() {\n"
+               "var b = 2;\n"
+               "})()");
+  // Internal calling parens; with semi.
+  verifyFormat("(function() {\n"
+               "var c = 3;\n"
+               "}());");
+  // External calling parens; with semi.
+  verifyFormat("(function() {\n"
+               "var d = 4;\n"
+               "})();");
+}
+
 TEST_F(FormatTestJS, GoogModules) {
   verifyFormat("goog.module('this.is.really.absurdly.long');",
                getGoogleJSStyleWithColumns(40));
@@ -451,6 +470,16 @@ TEST_F(FormatTestJS, FormatsFreestandingFunctions) {
                "  inner2(a, b);\n"
                "}");
   verifyFormat("function f() {}");
+  verifyFormat("function aFunction() {}\n"
+               "(function f() {\n"
+               "  var x = 1;\n"
+               "}());\n");
+  // Known issue: this should wrap after {}, but calculateBraceTypes
+  // misclassifies the first braces as a BK_BracedInit.
+  verifyFormat("function aFunction(){} {\n"
+               "  let x = 1;\n"
+               "  console.log(x);\n"
+               "}\n");
 }
 
 TEST_F(FormatTestJS, GeneratorFunctions) {
@@ -519,6 +548,39 @@ TEST_F(FormatTestJS, AsyncFunctions) {
                "  // Comment.\n"
                "  return async.then();\n"
                "}\n");
+  verifyFormat("for async (const x of y) {\n"
+               "  console.log(x);\n"
+               "}\n");
+  verifyFormat("function asyncLoop() {\n"
+               "  for async (const x of y) {\n"
+               "    console.log(x);\n"
+               "  }\n"
+               "}\n");
+
+}
+
+TEST_F(FormatTestJS, FunctionParametersTrailingComma) {
+  verifyFormat("function trailingComma(\n"
+               "    p1,\n"
+               "    p2,\n"
+               "    p3,\n"
+               ") {\n"
+               "  a;  //\n"
+               "}\n",
+               "function trailingComma(p1, p2, p3,) {\n"
+               "  a;  //\n"
+               "}\n");
+  verifyFormat("trailingComma(\n"
+               "    p1,\n"
+               "    p2,\n"
+               "    p3,\n"
+               ");\n",
+               "trailingComma(p1, p2, p3,);\n");
+  verifyFormat("trailingComma(\n"
+               "    p1  // hello\n"
+               ");\n",
+               "trailingComma(p1 // hello\n"
+               ");\n");
 }
 
 TEST_F(FormatTestJS, ArrayLiterals) {
@@ -662,7 +724,7 @@ TEST_F(FormatTestJS, FunctionLiterals) {
                "})\n"
                "    .doSomethingElse(\n"
                "        // break\n"
-               "        );");
+               "    );");
 
   Style.ColumnLimit = 33;
   verifyFormat("f({a: function() { return 1; }});", Style);
@@ -829,7 +891,7 @@ TEST_F(FormatTestJS, ArrowFunctions) {
                "})\n"
                "    .doSomethingElse(\n"
                "        // break\n"
-               "        );");
+               "    );");
 }
 
 TEST_F(FormatTestJS, ReturnStatements) {
@@ -1770,6 +1832,8 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
       "            .foo()!\n"
       "            .foo()!;\n",
       getGoogleJSStyleWithColumns(20));
+  verifyFormat("let x = namespace!;\n");
+  verifyFormat("return !!x;\n");
 }
 
 TEST_F(FormatTestJS, Conditional) {
@@ -1785,6 +1849,7 @@ TEST_F(FormatTestJS, ImportComments) {
   verifyFormat("import {x} from 'x';  // from some location",
                getGoogleJSStyleWithColumns(25));
   verifyFormat("// taze: x from 'location'", getGoogleJSStyleWithColumns(10));
+  verifyFormat("/// <reference path=\"some/location\" />", getGoogleJSStyleWithColumns(10));
 }
 
 TEST_F(FormatTestJS, Exponentiation) {
diff --git a/unittests/Format/FormatTestSelective.cpp b/unittests/Format/FormatTestSelective.cpp
index 8046d7fab2dd..9ee87b34c217 100644
--- a/unittests/Format/FormatTestSelective.cpp
+++ b/unittests/Format/FormatTestSelective.cpp
@@ -325,7 +325,7 @@ TEST_F(FormatTestSelective, WrongIndent) {
 }
 
 TEST_F(FormatTestSelective, AlwaysFormatsEntireMacroDefinitions) {
-  Style.AlignEscapedNewlinesLeft = true;
+  Style.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   EXPECT_EQ("int  i;\n"
             "#define A \\\n"
             "  int i;  \\\n"
@@ -467,7 +467,7 @@ TEST_F(FormatTestSelective, ReformatRegionAdjustsIndent) {
 TEST_F(FormatTestSelective, UnderstandsTabs) {
   Style.IndentWidth = 8;
   Style.UseTab = FormatStyle::UT_Always;
-  Style.AlignEscapedNewlinesLeft = true;
+  Style.AlignEscapedNewlines = FormatStyle::ENAS_Left;
   EXPECT_EQ("void f() {\n"
             "\tf();\n"
             "\tg();\n"
diff --git a/unittests/Tooling/RecursiveASTVisitorTest.cpp b/unittests/Tooling/RecursiveASTVisitorTest.cpp
index 7e08f9619c1c..269bdbb34ab1 100644
--- a/unittests/Tooling/RecursiveASTVisitorTest.cpp
+++ b/unittests/Tooling/RecursiveASTVisitorTest.cpp
@@ -52,6 +52,14 @@ TEST(RecursiveASTVisitor, TraverseLambdaBodyCanBeOverridden) {
   EXPECT_TRUE(Visitor.allBodiesHaveBeenTraversed());
 }
 
+TEST(RecursiveASTVisitor, VisitsAttributedLambdaExpr) {
+  LambdaExprVisitor Visitor;
+  Visitor.ExpectMatch("", 1, 12);
+  EXPECT_TRUE(Visitor.runOver(
+      "void f() { [] () __attribute__ (( fastcall )) { return; }(); }",
+      LambdaExprVisitor::Lang_CXX14));
+}
+
 // Matches the (optional) capture-default of a lambda-introducer.
 class LambdaDefaultCaptureVisitor
   : public ExpectedLocationVisitor<LambdaDefaultCaptureVisitor> {
diff --git a/unittests/Tooling/RefactoringCallbacksTest.cpp b/unittests/Tooling/RefactoringCallbacksTest.cpp
index ad8aa8f98feb..e226522a70f4 100644
--- a/unittests/Tooling/RefactoringCallbacksTest.cpp
+++ b/unittests/Tooling/RefactoringCallbacksTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Tooling/RefactoringCallbacks.h"
 #include "RewriterTestContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Tooling/RefactoringCallbacks.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -19,11 +19,10 @@ namespace tooling {
 using namespace ast_matchers;
 
 template <typename T>
-void expectRewritten(const std::string &Code,
-                     const std::string &Expected,
-                     const T &AMatcher,
-                     RefactoringCallback &Callback) {
-  MatchFinder Finder;
+void expectRewritten(const std::string &Code, const std::string &Expected,
+                     const T &AMatcher, RefactoringCallback &Callback) {
+  std::map<std::string, Replacements> FileToReplace;
+  ASTMatchRefactorer Finder(FileToReplace);
   Finder.addMatcher(AMatcher, &Callback);
   std::unique_ptr<tooling::FrontendActionFactory> Factory(
       tooling::newFrontendActionFactory(&Finder));
@@ -31,7 +30,7 @@ void expectRewritten(const std::string &Code,
       << "Parsing error in \"" << Code << "\"";
   RewriterTestContext Context;
   FileID ID = Context.createInMemoryFile("input.cc", Code);
-  EXPECT_TRUE(tooling::applyAllReplacements(Callback.getReplacements(),
+  EXPECT_TRUE(tooling::applyAllReplacements(FileToReplace["input.cc"],
                                             Context.Rewrite));
   EXPECT_EQ(Expected, Context.getRewrittenText(ID));
 }
@@ -61,18 +60,18 @@ TEST(RefactoringCallbacksTest, ReplacesInteger) {
   std::string Code = "void f() { int i = 1; }";
   std::string Expected = "void f() { int i = 2; }";
   ReplaceStmtWithText Callback("id", "2");
-  expectRewritten(Code, Expected, id("id", expr(integerLiteral())),
-                  Callback);
+  expectRewritten(Code, Expected, id("id", expr(integerLiteral())), Callback);
 }
 
 TEST(RefactoringCallbacksTest, ReplacesStmtWithStmt) {
   std::string Code = "void f() { int i = false ? 1 : i * 2; }";
   std::string Expected = "void f() { int i = i * 2; }";
   ReplaceStmtWithStmt Callback("always-false", "should-be");
-  expectRewritten(Code, Expected,
-      id("always-false", conditionalOperator(
-          hasCondition(cxxBoolLiteral(equals(false))),
-          hasFalseExpression(id("should-be", expr())))),
+  expectRewritten(
+      Code, Expected,
+      id("always-false",
+         conditionalOperator(hasCondition(cxxBoolLiteral(equals(false))),
+                             hasFalseExpression(id("should-be", expr())))),
       Callback);
 }
 
@@ -80,10 +79,10 @@ TEST(RefactoringCallbacksTest, ReplacesIfStmt) {
   std::string Code = "bool a; void f() { if (a) f(); else a = true; }";
   std::string Expected = "bool a; void f() { f(); }";
   ReplaceIfStmtWithItsBody Callback("id", true);
-  expectRewritten(Code, Expected,
-      id("id", ifStmt(
-          hasCondition(implicitCastExpr(hasSourceExpression(
-              declRefExpr(to(varDecl(hasName("a"))))))))),
+  expectRewritten(
+      Code, Expected,
+      id("id", ifStmt(hasCondition(implicitCastExpr(hasSourceExpression(
+                   declRefExpr(to(varDecl(hasName("a"))))))))),
       Callback);
 }
 
@@ -92,9 +91,63 @@ TEST(RefactoringCallbacksTest, RemovesEntireIfOnEmptyElse) {
   std::string Expected = "void f() {  }";
   ReplaceIfStmtWithItsBody Callback("id", false);
   expectRewritten(Code, Expected,
-      id("id", ifStmt(hasCondition(cxxBoolLiteral(equals(false))))),
-      Callback);
+                  id("id", ifStmt(hasCondition(cxxBoolLiteral(equals(false))))),
+                  Callback);
 }
 
+TEST(RefactoringCallbacksTest, TemplateJustText) {
+  std::string Code = "void f() { int i = 1; }";
+  std::string Expected = "void f() { FOO }";
+  auto Callback = ReplaceNodeWithTemplate::create("id", "FOO");
+  EXPECT_FALSE(Callback.takeError());
+  expectRewritten(Code, Expected, id("id", declStmt()), **Callback);
+}
+
+TEST(RefactoringCallbacksTest, TemplateSimpleSubst) {
+  std::string Code = "void f() { int i = 1; }";
+  std::string Expected = "void f() { long x = 1; }";
+  auto Callback = ReplaceNodeWithTemplate::create("decl", "long x = ${init}");
+  EXPECT_FALSE(Callback.takeError());
+  expectRewritten(Code, Expected,
+                  id("decl", varDecl(hasInitializer(id("init", expr())))),
+                  **Callback);
+}
+
+TEST(RefactoringCallbacksTest, TemplateLiteral) {
+  std::string Code = "void f() { int i = 1; }";
+  std::string Expected = "void f() { string x = \"$-1\"; }";
+  auto Callback = ReplaceNodeWithTemplate::create("decl",
+                                                  "string x = \"$$-${init}\"");
+  EXPECT_FALSE(Callback.takeError());
+  expectRewritten(Code, Expected,
+                  id("decl", varDecl(hasInitializer(id("init", expr())))),
+                  **Callback);
+}
+
+static void ExpectStringError(const std::string &Expected,
+                              llvm::Error E) {
+  std::string Found;
+  handleAllErrors(std::move(E), [&](const llvm::StringError &SE) {
+      llvm::raw_string_ostream Stream(Found);
+      SE.log(Stream);
+    });
+  EXPECT_EQ(Expected, Found);
+}
+
+TEST(RefactoringCallbacksTest, TemplateUnterminated) {
+  auto Callback = ReplaceNodeWithTemplate::create("decl",
+                                                  "string x = \"$$-${init\"");
+  ExpectStringError("Unterminated ${...} in replacement template near ${init\"",
+                    Callback.takeError());
+}
+
+TEST(RefactoringCallbacksTest, TemplateUnknownDollar) {
+  auto Callback = ReplaceNodeWithTemplate::create("decl",
+                                                  "string x = \"$<");
+  ExpectStringError("Invalid $ in replacement template near $<",
+                    Callback.takeError());
+}
+
+
 } // end namespace ast_matchers
 } // end namespace clang
diff --git a/www/cxx_dr_status.html b/www/cxx_dr_status.html
index a01e0dd8dddb..b03b3f0ef9c2 100644
--- a/www/cxx_dr_status.html
+++ b/www/cxx_dr_status.html
@@ -28,7 +28,7 @@
 <!--*************************************************************************-->
 <h1>C++ Defect Report Support in Clang</h1>
 <!--*************************************************************************-->
-<p>Last updated: $Date: 2017-03-17 22:41:20 +0100 (Fri, 17 Mar 2017) $</p>
+<p>Last updated: $Date: 2017-05-10 00:21:24 +0200 (Wed, 10 May 2017) $</p>
 
 <h2 id="cxxdr">C++ defect report implementation status</h2>
 
@@ -589,7 +589,7 @@
   </tr>
   <tr id="92">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#92">92</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td>Should <I>exception-specification</I>s be part of the type system?</td>
     <td class="svn" align="center">Clang 4 (C++17 onwards)</td>
   </tr>
@@ -935,11 +935,11 @@
     <td>Accessibility and ambiguity</td>
     <td class="na" align="center">N/A</td>
   </tr>
-  <tr class="open" id="150">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#150">150</a></td>
-    <td>open</td>
+  <tr id="150">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#150">150</a></td>
+    <td>DR</td>
     <td>Template template parameters and default arguments</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="151">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#151">151</a></td>
@@ -1310,7 +1310,7 @@ accessible?</td>
   </tr>
   <tr id="212">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#212">212</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Implicit instantiation is not described clearly enough</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -1466,7 +1466,7 @@ accessible?</td>
   </tr>
   <tr id="238">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#238">238</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Precision and accuracy constraints on floating point</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -1490,7 +1490,7 @@ accessible?</td>
   </tr>
   <tr id="242">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#242">242</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Interpretation of old-style casts</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -2019,7 +2019,7 @@ of class templates</td>
   </tr>
   <tr id="330">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#330">330</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Qualification conversions and pointers to arrays of pointers</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -2397,7 +2397,7 @@ of class templates</td>
   </tr>
   <tr id="393">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#393">393</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Pointer to array of unknown bound in template argument list in parameter</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -3017,7 +3017,7 @@ of class templates</td>
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#496">496</a></td>
     <td>CD3</td>
     <td>Is a volatile-qualified type really a POD?</td>
-    <td class="none" align="center">No</td>
+    <td class="none" align="center">Superseded by <a href="#dr2094">dr2094</a></td>
   </tr>
   <tr id="497">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#497">497</a></td>
@@ -3587,7 +3587,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="591">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#591">591</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>When a dependent base class is the current instantiation</td>
     <td class="none" align="center">No</td>
   </tr>
@@ -3695,7 +3695,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="609">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#609">609</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>What is a &#8220;top-level&#8221; cv-qualifier?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -5735,7 +5735,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="987">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#987">987</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Which declarations introduce namespace members?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -5939,7 +5939,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1021">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1021">1021</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Definitions of namespace members</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -6509,7 +6509,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1116">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1116">1116</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Aliasing of union members</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7295,7 +7295,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1247">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1247">1247</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Restriction on alias name appearing in <I>type-id</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7457,7 +7457,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1274">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1274">1274</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Common nonterminal for <I>expression</I> and <I>braced-init-list</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7517,7 +7517,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1284">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1284">1284</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Should the lifetime of an array be independent of that of its elements?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7565,7 +7565,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1292">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1292">1292</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Dependent calls with <I>braced-init-list</I>s containing a pack expansion</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7667,7 +7667,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1309">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1309">1309</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect note regarding lookup of a member of the current instantiation</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7703,7 +7703,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1315">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1315">1315</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Restrictions on non-type template arguments in partial specializations</td>
     <td class="partial" align="center">Partial</td>
   </tr>
@@ -7841,7 +7841,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1338">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1338">1338</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Aliasing and allocation functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7870,8 +7870,8 @@ and <I>POD class</I></td>
     <td align="center">Not resolved</td>
   </tr>
   <tr id="1343">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1343">1343</a></td>
-    <td>tentatively ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1343">1343</a></td>
+    <td>DR</td>
     <td>Sequencing of non-class initialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7919,7 +7919,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1351">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1351">1351</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Problems with implicitly-declared <I>exception-specification</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -7949,7 +7949,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1356">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1356">1356</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Exception specifications of copy assignment operators with virtual bases</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8159,7 +8159,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1391">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1391">1391</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Conversions to parameter types with non-deduced template arguments</td>
     <td class="partial" align="center">Partial</td>
   </tr>
@@ -8182,8 +8182,8 @@ and <I>POD class</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1395">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1395">1395</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1395">1395</a></td>
+    <td>DR</td>
     <td>Partial ordering of variadic templates reconsidered</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8195,7 +8195,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1397">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397">1397</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Class completeness in non-static data member initializers</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8489,7 +8489,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1446">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1446">1446</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Member function with no <I>ref-qualifier</I> and non-member function with rvalue reference</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8603,7 +8603,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1465">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1465">1465</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>noexcept</TT> and <TT>std::bad_array_new_length</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8615,7 +8615,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1467">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1467">1467</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>List-initialization of aggregate from same-type object</td>
     <td class="full" align="center">Clang 3.7 (C++11 onwards)</td>
   </tr>
@@ -8717,7 +8717,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1484">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1484">1484</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unused local classes of function templates</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8753,7 +8753,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1490">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1490">1490</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>List-initialization from a string literal</td>
     <td class="full" align="center">Clang 3.7 (C++11 onwards)</td>
   </tr>
@@ -8765,7 +8765,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1492">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1492">1492</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Exception specifications on template destructors</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8789,7 +8789,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1496">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1496">1496</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Triviality with deleted and missing default constructors</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -8921,7 +8921,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1518">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1518">1518</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Explicit default constructors and copy-list-initialization</td>
     <td class="svn" align="center">Clang 4</td>
   </tr>
@@ -9125,7 +9125,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1552">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1552">1552</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><I>exception-specification</I>s and defaulted special member functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9161,7 +9161,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1558">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1558">1558</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unused arguments in alias template specializations</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9201,11 +9201,11 @@ and <I>POD class</I></td>
     <td>Template argument deduction from an initializer list</td>
     <td class="none" align="center">Unknown</td>
   </tr>
-  <tr class="open" id="1565">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1565">1565</a></td>
-    <td>drafting</td>
+  <tr id="1565">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#1565">1565</a></td>
+    <td>NAD</td>
     <td>Copy elision and lifetime of <TT>initializer_list</TT> underlying array</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1566">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#1566">1566</a></td>
@@ -9239,19 +9239,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1571">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1571">1571</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>cv-qualification for indirect reference binding via conversion function</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1572">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1572">1572</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect example for rvalue reference binding via conversion function</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1573">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1573">1573</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Inherited constructor characteristics</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
@@ -9347,7 +9347,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1589">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1589">1589</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Ambiguous ranking of list-initialization sequences</td>
     <td class="full" align="center">Clang 3.7 (C++11 onwards)</td>
   </tr>
@@ -9359,7 +9359,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1591">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1591">1591</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Deducing array bound and element type from initializer list</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9389,7 +9389,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1596">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1596">1596</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Non-array objects as <TT>array[1]</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9413,7 +9413,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1600">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1600">1600</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Erroneous reference initialization in example</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9431,7 +9431,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1603">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1603">1603</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Errors resulting from giving unnamed namespaces internal linkage</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9497,13 +9497,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1614">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1614">1614</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Address of pure virtual function vs odr-use</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1615">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1615">1615</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Alignment of types, variables, and members</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9543,11 +9543,11 @@ and <I>POD class</I></td>
     <td>Member initializers in anonymous unions</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="1622">
+  <tr id="1622">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1622">1622</a></td>
-    <td>drafting</td>
+    <td>ready</td>
     <td>Empty aggregate initializer for union</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="1623">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1623">1623</a></td>
@@ -9593,13 +9593,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1630">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1630">1630</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Multiple default constructor templates</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1631">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1631">1631</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect overload resolution for single-element <I>initializer-list</I></td>
     <td class="full" align="center">Clang 3.7</td>
   </tr>
@@ -9611,7 +9611,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1633">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1633">1633</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Copy-initialization in member initialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9641,13 +9641,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1638">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1638">1638</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Declaring an explicit specialization of a scoped enumeration</td>
     <td class="full" align="center">Yes</td>
   </tr>
   <tr id="1639">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1639">1639</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><I>exception-specification</I>s and pointer/pointer-to-member expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9683,7 +9683,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1645">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1645">1645</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Identical inheriting constructors via default arguments</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
@@ -9725,13 +9725,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1652">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1652">1652</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Object addresses in <TT>constexpr</TT> expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1653">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1653">1653</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td>Removing deprecated increment of <TT>bool</TT></td>
     <td class="svn" align="center">Clang 4 (C++17 onwards)</td>
   </tr>
@@ -9755,7 +9755,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1657">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1657">1657</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td>Attributes for namespaces and enumerators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9845,7 +9845,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1672">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1672">1672</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Layout compatibility with multiple empty bases</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9873,11 +9873,11 @@ and <I>POD class</I></td>
     <td><TT>auto</TT> return type for allocation and deallocation functions</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="1677">
+  <tr id="1677">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677">1677</a></td>
-    <td>drafting</td>
+    <td>ready</td>
     <td>Constant initialization via aggregate initialization</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1678">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_closed.html#1678">1678</a></td>
@@ -9911,7 +9911,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1683">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1683">1683</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect example after <TT>constexpr</TT> changes</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9929,7 +9929,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1686">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1686">1686</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Which variables are &#8220;explicitly declared <TT>const</TT>?&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9977,7 +9977,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1694">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1694">1694</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Restriction on reference to temporary as a constant expression</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -9989,7 +9989,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1696">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1696">1696</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Temporary lifetime and non-static data member initializers</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10043,7 +10043,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1705">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1705">1705</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unclear specification of &#8220;more specialized&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10061,7 +10061,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1708">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1708">1708</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>overly-strict requirements for names with C language linkage</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10085,7 +10085,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1712">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1712">1712</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>constexpr</TT> variable template declarations</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10103,7 +10103,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1715">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1715">1715</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Access and inherited constructor templates</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
@@ -10127,7 +10127,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1719">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1719">1719</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Layout compatibility and cv-qualification revisited</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10145,7 +10145,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1722">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1722">1722</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Should lambda to function pointer conversion function be <TT>noexcept</TT>?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10217,7 +10217,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1734">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1734">1734</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Nontrivial deleted copy functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10229,7 +10229,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1736">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1736">1736</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Inheriting constructor templates in a local class</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
@@ -10277,7 +10277,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1744">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1744">1744</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unordered initialization for variable template specializations</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10301,7 +10301,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1748">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1748">1748</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Placement new with a null pointer</td>
     <td class="full" align="center">Clang 3.7</td>
   </tr>
@@ -10313,25 +10313,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1750">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1750">1750</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>&#8220;Argument&#8221; vs &#8220;parameter&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1751">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1751">1751</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Non-trivial operations vs non-trivial initialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1752">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1752">1752</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Right-recursion in <I>mem-initializer-list</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1753">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1753">1753</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><I>decltype-specifier</I> in <I>nested-name-specifier</I> of destructor</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10349,19 +10349,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1756">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1756">1756</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Direct-list-initialization of a non-class object</td>
     <td class="full" align="center">Clang 3.7</td>
   </tr>
   <tr id="1757">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1757">1757</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Const integral subobjects</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1758">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1758">1758</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Explicit conversion in copy/move list initialization</td>
     <td class="full" align="center">Clang 3.7</td>
   </tr>
@@ -10409,7 +10409,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1766">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1766">1766</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Values outside the range of the values of an enumeration</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10457,7 +10457,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1774">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1774">1774</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Discrepancy between subobject destruction and stack unwinding</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10469,13 +10469,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1776">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1776">1776</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Replacement of class objects containing reference members</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1777">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1777">1777</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Empty pack expansion in <I>dynamic-exception-specification</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10487,13 +10487,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1779">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1779">1779</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Type dependency of <TT>__func__</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1780">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1780">1780</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Explicit instantiation/specialization of generic lambda <TT>operator()</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10505,7 +10505,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1782">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1782">1782</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Form of initialization for <TT>nullptr_t</TT> to <TT>bool</TT> conversion</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10541,7 +10541,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1788">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1788">1788</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Sized deallocation of array of non-class type</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10559,7 +10559,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1791">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1791">1791</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect restrictions on <I>cv-qualifier-seq</I> and <I>ref-qualifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10571,7 +10571,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1793">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1793">1793</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>thread_local</TT> in explicit specializations</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10583,19 +10583,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1795">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1795">1795</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Disambiguating <I>original-namespace-definition</I> and <I>extension-namespace-definition</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1796">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1796">1796</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Is all-bits-zero for null characters a meaningful requirement?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1797">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1797">1797</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Are all bit patterns of <TT>unsigned char</TT> distinct numbers?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10607,13 +10607,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1799">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1799">1799</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>mutable</TT> and non-explicit const qualification</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1800">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1800">1800</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Pointer to member of nested anonymous union</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10625,7 +10625,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1802">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1802">1802</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>char16_t</TT> string literals and surrogate pairs</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10637,25 +10637,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1804">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1804">1804</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Partial specialization and friendship</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1805">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1805">1805</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Conversions of array operands in <I>conditional-expression</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1806">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1806">1806</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Virtual bases and move-assignment</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1807">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1807">1807</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Order of destruction of array elements after an exception</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10667,19 +10667,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1809">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1809">1809</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Narrowing and template argument deduction</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1810">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1810">1810</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Invalid <I>ud-suffix</I>es</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1811">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1811">1811</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Lookup of deallocation function in a virtual destructor definition</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10691,25 +10691,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1813">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1813">1813</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Direct vs indirect bases in standard-layout classes</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1814">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1814">1814</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Default arguments in <I>lambda-expression</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1815">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1815">1815</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Lifetime extension in aggregate initialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1816">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1816">1816</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unclear specification of bit-field values</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10727,7 +10727,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1819">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1819">1819</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Acceptable scopes for definition of partial specialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10751,19 +10751,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1823">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1823">1823</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>String literal uniqueness in inline functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1824">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1824">1824</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Completeness of return type vs point of instantiation</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1825">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1825">1825</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1825">1825</a></td>
+    <td>DR</td>
     <td>Partial ordering between variadic and non-variadic function templates</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10793,7 +10793,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1830">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1830">1830</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Repeated specifiers</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10805,7 +10805,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1832">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1832">1832</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Casting to incomplete enumeration</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10817,7 +10817,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1834">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1834">1834</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Constant initialization binding a reference to an xvalue</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10841,7 +10841,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1838">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1838">1838</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Definition via <I>unqualified-id</I> and <I>using-declaration</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10871,7 +10871,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1843">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1843">1843</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Bit-field in conditional operator with <TT>throw</TT> operand</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10889,19 +10889,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1846">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1846">1846</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Declaring explicitly-defaulted implicitly-deleted functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1847">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1847">1847</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Clarifying compatibility during partial ordering</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1848">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1848">1848</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Parenthesized constructor and destructor declarators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10913,19 +10913,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1850">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1850">1850</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Differences between definition context and point of instantiation</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1851">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1851">1851</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>decltype(auto)</TT> in <I>new-expression</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1852">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1852">1852</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Wording issues regarding <TT>decltype(auto)</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10961,7 +10961,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1858">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1858">1858</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Comparing pointers to union members</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10971,15 +10971,15 @@ and <I>POD class</I></td>
     <td>UTF-16 in <TT>char16_t</TT> string literals</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="1860">
+  <tr id="1860">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1860">1860</a></td>
-    <td>review</td>
+    <td>ready</td>
     <td>What is a &#8220;direct member?&#8221;</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1861">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1861">1861</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Values of a bit-field</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -10991,7 +10991,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1863">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1863">1863</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Requirements on thrown object type to support <TT>std::current_exception()</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11003,13 +11003,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1865">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1865">1865</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Pointer arithmetic and multi-level qualification conversions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1866">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1866">1866</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Initializing variant members with non-trivial destructors</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11033,7 +11033,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1870">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1870">1870</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Contradictory wording about definitions vs explicit specialization/instantiation</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11045,25 +11045,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1872">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1872">1872</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Instantiations of <TT>constexpr</TT> templates that cannot appear in constant expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1873">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1873">1873</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Protected member access from derived class friends</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1874">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1874">1874</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Type vs non-type template parameters with <TT>class</TT> keyword</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1875">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1875">1875</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Reordering declarations in class scope</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11075,13 +11075,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1877">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1877">1877</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Return type deduction from <TT>return</TT> with no operand</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1878">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1878">1878</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>operator auto</TT> template</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11099,13 +11099,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1881">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1881">1881</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Standard-layout classes and unnamed bit-fields</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1882">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1882">1882</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Reserved names without library use</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11123,25 +11123,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1885">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1885">1885</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Return value of a function is underspecified</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1886">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1886">1886</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Language linkage for <TT>main()</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1887">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1887">1887</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Problems with <TT>::</TT> as <I>nested-name-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1888">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1888">1888</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Implicitly-declared default constructors and <TT>explicit</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11159,13 +11159,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1891">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1891">1891</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Move constructor/assignment for closure class</td>
     <td class="svn" align="center">Clang 4</td>
   </tr>
   <tr id="1892">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1892">1892</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Use of <TT>auto</TT> in function type</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11183,7 +11183,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1895">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1895">1895</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Deleted conversions in conditional operator operands</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11207,7 +11207,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1899">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1899">1899</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Value-dependent constant expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11225,13 +11225,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1902">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1902">1902</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>What makes a conversion &#8220;otherwise ill-formed&#8221;?</td>
     <td class="full" align="center">Clang 3.7</td>
   </tr>
   <tr id="1903">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1903">1903</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>What declarations are introduced by a non-member <I>using-declaration</I>?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11267,7 +11267,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1909">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1909">1909</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Member class template with the same name as the class</td>
     <td class="full" align="center">Yes</td>
   </tr>
@@ -11279,7 +11279,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1911">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1911">1911</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>constexpr</TT> constructor with non-literal base class</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11309,7 +11309,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1916">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1916">1916</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>&#8220;Same cv-unqualified type&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11333,7 +11333,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1920">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1920">1920</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Qualification mismatch in <I>pseudo-destructor-name</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11345,7 +11345,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1922">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1922">1922</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Injected class template names and default arguments</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11357,19 +11357,19 @@ and <I>POD class</I></td>
   </tr>
   <tr class="open" id="1924">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1924">1924</a></td>
-    <td>drafting</td>
+    <td>review</td>
     <td>Definition of &#8220;literal&#8221; and kinds of literals</td>
     <td align="center">Not resolved</td>
   </tr>
   <tr id="1925">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1925">1925</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Bit-field prvalues</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1926">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1926">1926</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Potential results of subscript operator</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11387,13 +11387,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1929">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1929">1929</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>template</TT> keyword following namespace <I>nested-name-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1930">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1930">1930</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><I>init-declarator-list</I> vs <I>member-declarator-list</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11405,7 +11405,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1932">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1932">1932</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Bit-field results of conditional operators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11453,19 +11453,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1940">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1940">1940</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>static_assert</TT> in anonymous unions</td>
     <td class="full" align="center">Yes</td>
   </tr>
   <tr id="1941">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1941">1941</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>SFINAE and inherited constructor default arguments</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
   <tr id="1942">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1942">1942</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect reference to <I>trailing-return-type</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11489,7 +11489,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1946">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1946">1946</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td><I>exception-specification</I>s vs pointer dereference</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11507,7 +11507,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1949">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1949">1949</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>&#8220;sequenced after&#8221; instead of &#8220;sequenced before&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11519,13 +11519,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1951">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1951">1951</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Cv-qualification and literal types</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1952">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1952">1952</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Constant expressions and library undefined behavior</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11543,13 +11543,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1955">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1955">1955</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><TT>#elif</TT> with invalid controlling expression</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1956">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1956">1956</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Reuse of storage of automatic variables</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11561,13 +11561,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1958">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1958">1958</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><TT>decltype(auto)</TT> with parenthesized initializer</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1959">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1959">1959</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Inadvertently inherited copy constructor</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
@@ -11578,8 +11578,8 @@ and <I>POD class</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1961">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1961">1961</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1961">1961</a></td>
+    <td>DR</td>
     <td>Potentially-concurrent actions within a signal handler</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11591,7 +11591,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1963">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1963">1963</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Implementation-defined identifier characters</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11609,13 +11609,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1966">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1966">1966</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Colon following enumeration <I>elaborated-type-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1967">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1967">1967</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Temporary lifetime and move-elision</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11639,7 +11639,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1971">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1971">1971</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unclear disambiguation of destructor and <TT>operator~</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11663,7 +11663,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1975">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1975">1975</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Permissible declarations for <I>exception-specification</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11681,7 +11681,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1978">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1978">1978</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Redundant description of explicit constructor use</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11699,7 +11699,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1981">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1981">1981</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Implicit contextual conversions and <TT>explicit</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11741,7 +11741,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1988">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1988">1988</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Ambiguity between dependent and non-dependent bases in implicit member access</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11753,19 +11753,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1990">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1990">1990</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Ambiguity due to optional <I>decl-specifier-seq</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="1991">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1991">1991</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Inheriting constructors vs default arguments</td>
     <td class="full" align="center">Clang 3.9</td>
   </tr>
   <tr id="1992">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1992">1992</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><TT>new (std::nothrow) int[N]</TT> can throw</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11783,7 +11783,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1995">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1995">1995</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td><I>exception-specification</I>s and non-type template parameters</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11807,19 +11807,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="1999">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1999">1999</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Representation of source characters as universal-character-names</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2000">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2000">2000</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td><I>header-name</I> outside <TT>#include</TT> directive</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2001">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2001">2001</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><I>non-directive</I> is underspecified</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11837,7 +11837,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2004">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2004">2004</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unions with mutable members in constant expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11849,7 +11849,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2006">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2006">2006</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Cv-qualified <TT>void</TT> types</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11861,7 +11861,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2008">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2008">2008</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Default <I>template-argument</I>s underspecified</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11873,7 +11873,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2010">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2010">2010</a></td>
-    <td>WP</td>
+    <td>CD4</td>
     <td><I>exception-specification</I>s and conversion operators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11885,7 +11885,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2012">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2012">2012</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Lifetime of references</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11903,19 +11903,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2015">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2015">2015</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>odr-use of deleted virtual functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2016">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2016">2016</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Confusing wording in description of conversion function</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2017">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2017">2017</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Flowing off end is not equivalent to no-expression return</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11927,7 +11927,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2019">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2019">2019</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Member references omitted from description of storage duration</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11945,7 +11945,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2022">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2022">2022</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Copy elision in constant expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11957,7 +11957,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2024">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2024">2024</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Dependent types and unexpanded parameter packs</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11969,13 +11969,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2026">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2026">2026</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Zero-initialization and <TT>constexpr</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2027">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2027">2027</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Unclear requirements for multiple <TT>alignas</TT> specifiers</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -11999,19 +11999,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2031">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2031">2031</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Missing incompatibility for <TT>&amp;&amp;</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2032">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2032">2032</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Default <I>template-argument</I>s of variable templates</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2033">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2033">2033</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Redundant restriction on partial specialization argument</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12041,25 +12041,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2038">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2038">2038</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Document C++14 incompatibility of new braced deduction rule</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2039">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2039">2039</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Constant conversions to <TT>bool</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2040">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2040">2040</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><I>trailing-return-type</I> no longer ambiguous</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2041">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2041">2041</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Namespace for explicit class template specialization</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12077,7 +12077,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2044">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2044">2044</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><TT>decltype(auto)</TT> and <TT>void</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12095,7 +12095,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2047">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2047">2047</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Coordinating &#8220;throws anything&#8221; specifications</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12125,7 +12125,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2052">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2052">2052</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Template argument deduction vs overloaded operators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12179,7 +12179,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2061">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2061">2061</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Inline namespace after simplifications</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12191,13 +12191,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2063">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2063">2063</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Type/nontype hiding in class scope</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2064">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2064">2064</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Conflicting specifications for dependent <I>decltype-specifier</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12209,7 +12209,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2066">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2066">2066</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Does type-dependent imply value-dependent?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12221,13 +12221,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2068">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2068">2068</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>When can/must a defaulted virtual destructor be defined?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2069">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2069">2069</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Do destructors have names?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12239,7 +12239,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2071">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2071">2071</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><TT>typedef</TT> with no declarator</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12263,13 +12263,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2075">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2075">2075</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Passing short initializer lists to array reference parameters</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2076">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2076">2076</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>List-initialization of arguments for constructor parameters</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12287,7 +12287,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2079">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2079">2079</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><TT>[[</TT> appearing in a <I>balanced-token-seq</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12305,7 +12305,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2082">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2082">2082</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Referring to parameters in unevaluated operands of default arguments</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12317,13 +12317,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2084">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2084">2084</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>NSDMIs and deleted union default constructors</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2085">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2085">2085</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Invalid example of adding special member function via default argument</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12359,7 +12359,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2091">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2091">2091</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Deducing reference non-type template arguments</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12371,25 +12371,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2093">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2093">2093</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Qualification conversion for pointer-to-member handler matching</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2094">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2094">2094</a></td>
-    <td>tentatively ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2094">2094</a></td>
+    <td>DR</td>
     <td>Trivial copy/move constructor for class with volatile member</td>
-    <td class="none" align="center">Unknown</td>
+    <td class="full" align="center">Clang 5.0</td>
   </tr>
   <tr id="2095">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2095">2095</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Capturing rvalue references to functions by copy</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2096">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2096">2096</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Constraints on literal unions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12401,25 +12401,25 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2098">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2098">2098</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Is <TT>uncaught_exceptions()</TT> per-thread?</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2099">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2099">2099</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Inferring the bound of an array static data member</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2100">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2100">2100</a></td>
-    <td>tentatively ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2100">2100</a></td>
+    <td>DR</td>
     <td>Value-dependent address of static data member of class template</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2101">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2101">2101</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Incorrect description of type- and value-dependence</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12437,7 +12437,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2104">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2104">2104</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Internal-linkage <TT>constexpr</TT> references and ODR requirements</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12449,13 +12449,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2106">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2106">2106</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Unclear restrictions on use of function-type template arguments</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2107">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2107">2107</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Lifetime of temporaries for default arguments in array copying</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12467,7 +12467,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2109">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2109">2109</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Value dependence underspecified</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12491,7 +12491,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2113">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2113">2113</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Incompete specification of types for declarators</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12533,7 +12533,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2120">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2120">2120</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Array as first non-static data member in standard-layout class</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12545,7 +12545,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2122">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2122">2122</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Glvalues of <TT>void</TT> type</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12557,7 +12557,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2124">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2124">2124</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Signature of constructor template</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12587,13 +12587,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2129">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2129">2129</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Non-object prvalues and constant expressions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2130">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2130">2130</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Over-aligned types in <I>new-expression</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12635,7 +12635,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2137">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2137">2137</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>List-initialization from object of same type</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12653,13 +12653,13 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2140">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2140">2140</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Lvalue-to-rvalue conversion of <TT>std::nullptr_t</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2141">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2141">2141</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Ambiguity in <I>new-expression</I> with <I>elaborated-type-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12670,8 +12670,8 @@ and <I>POD class</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2143">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2143">2143</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2143">2143</a></td>
+    <td>DR</td>
     <td>Value-dependency via injected-class-name</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12683,19 +12683,19 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2145">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2145">2145</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Parenthesized declarator in function definition</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2146">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2146">2146</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Scalar object vs memory location in definition of &#8220;unsequenced&#8221;</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2147">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2147">2147</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Initializer-list arguments and pack deduction</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12731,31 +12731,31 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2153">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2153">2153</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td><I>pure-specifier</I> in friend declaration</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2154">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2154">2154</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Ambiguity of <I>pure-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2155">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2155">2155</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2155">2155</a></td>
+    <td>DR</td>
     <td>Defining classes and enumerations via <I>using-declaration</I>s</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2156">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2156">2156</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Definition of enumeration declared by <I>using-declaration</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2157">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2157">2157</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Further disambiguation of enumeration <I>elaborated-type-specifier</I></td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12791,7 +12791,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2163">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2163">2163</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Labels in <TT>constexpr</TT> functions</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12815,7 +12815,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2167">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2167">2167</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Non-member references with lifetimes within the current evaluation</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12839,7 +12839,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2171">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2171">2171</a></td>
-    <td>DRWP</td>
+    <td>CD4</td>
     <td>Triviality of copy constructor with less-qualified parameter</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12855,21 +12855,21 @@ and <I>POD class</I></td>
     <td>Partial specialization with non-deduced contexts</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2174">
+  <tr id="2174">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2174">2174</a></td>
-    <td>drafting</td>
+    <td>ready</td>
     <td>Unclear rules for friend definitions in templates</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2175">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2175">2175</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Ambiguity with attribute in conversion operator declaration</td>
     <td class="none" align="center">Unknown</td>
   </tr>
   <tr id="2176">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2176">2176</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Destroying the returned object when a destructor throws</td>
     <td class="none" align="center">Unknown</td>
   </tr>
@@ -12893,7 +12893,7 @@ and <I>POD class</I></td>
   </tr>
   <tr id="2180">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2180">2180</a></td>
-    <td>DR</td>
+    <td>CD4</td>
     <td>Virtual bases in destructors and defaulted assignment operators</td>
     <td class="full" align="center">Yes</td>
   </tr>
@@ -12915,11 +12915,11 @@ and <I>POD class</I></td>
     <td>Problems in description of potential exceptions</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2184">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2184">2184</a></td>
-    <td>review</td>
+  <tr id="2184">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2184">2184</a></td>
+    <td>CD4</td>
     <td>Missing C compatibility entry for decrement of <TT>bool</TT></td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2185">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2185">2185</a></td>
@@ -12957,11 +12957,11 @@ and <I>POD class</I></td>
     <td>Insufficient specification of <TT>__has_include</TT></td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2191">
+  <tr id="2191">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2191">2191</a></td>
-    <td>open</td>
+    <td>ready</td>
     <td>Incorrect result for <TT>noexcept(typeid(v))</TT></td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2192">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2192">2192</a></td>
@@ -12975,11 +12975,11 @@ and <I>POD class</I></td>
     <td><TT>numeric_limits&lt;int&gt;::radix</TT> and <TT>digits</TT></td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2194">
+  <tr id="2194">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2194">2194</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Impossible case in list initialization</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2195">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2195">2195</a></td>
@@ -12999,11 +12999,11 @@ and <I>POD class</I></td>
     <td>Overload resolution and deleted special member functions</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2198">
+  <tr id="2198">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2198">2198</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Linkage of enumerators</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2199">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2199">2199</a></td>
@@ -13017,11 +13017,11 @@ and <I>POD class</I></td>
     <td>Conversions in template argument deduction</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2201">
+  <tr id="2201">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2201">2201</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Cv-qualification of array types</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2202">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2202">2202</a></td>
@@ -13041,17 +13041,17 @@ and <I>POD class</I></td>
     <td>Naming delegated constructors</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2205">
+  <tr id="2205">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2205">2205</a></td>
-    <td>open</td>
+    <td>ready</td>
     <td>Restrictions on use of <TT>alignas</TT></td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
-  <tr class="open" id="2206">
+  <tr id="2206">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2206">2206</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Composite type of object and function pointers</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2207">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2207">2207</a></td>
@@ -13095,11 +13095,11 @@ and <I>POD class</I></td>
     <td>Forward declaration of partial specializations</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2214">
+  <tr id="2214">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2214">2214</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Missing requirement on representation of integer values</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2215">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2215">2215</a></td>
@@ -13119,11 +13119,11 @@ and <I>POD class</I></td>
     <td><TT>constexpr</TT> constructors for non-literal types</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2218">
+  <tr id="2218">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2218">2218</a></td>
-    <td>open</td>
+    <td>ready</td>
     <td>Ambiguity and namespace aliases</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2219">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2219">2219</a></td>
@@ -13131,11 +13131,11 @@ and <I>POD class</I></td>
     <td>Dynamically-unreachable handlers</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2220">
+  <tr id="2220">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2220">2220</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Hiding index variable in range-based <TT>for</TT></td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2221">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2221">2221</a></td>
@@ -13155,11 +13155,11 @@ and <I>POD class</I></td>
     <td>Multiple <TT>alignas</TT> specifiers</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2224">
+  <tr id="2224">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2224">2224</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Member subobjects and base-class casts</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2225">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2225">2225</a></td>
@@ -13301,7 +13301,7 @@ and <I>POD class</I></td>
   </tr>
   <tr class="open" id="2248">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2248">2248</a></td>
-    <td>open</td>
+    <td>review</td>
     <td>Problems with sized delete</td>
     <td align="center">Not resolved</td>
   </tr>
@@ -13319,7 +13319,7 @@ and <I>POD class</I></td>
   </tr>
   <tr class="open" id="2251">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2251">2251</a></td>
-    <td>open</td>
+    <td>review</td>
     <td>Unreachable enumeration list-initialization</td>
     <td align="center">Not resolved</td>
   </tr>
@@ -13365,11 +13365,11 @@ and <I>POD class</I></td>
     <td>Storage deallocation during period of destruction</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2259">
+  <tr id="2259">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2259">2259</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Unclear context describing ambiguity</td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2260">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2260">2260</a></td>
@@ -13383,11 +13383,11 @@ and <I>POD class</I></td>
     <td>Explicit instantiation of in-class <TT>friend</TT> definition</td>
     <td align="center">Not resolved</td>
   </tr>
-  <tr class="open" id="2262">
+  <tr id="2262">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2262">2262</a></td>
-    <td>open</td>
+    <td>tentatively ready</td>
     <td>Attributes for <I>asm-definition</I></td>
-    <td align="center">Not resolved</td>
+    <td class="none" align="center">Unknown</td>
   </tr>
   <tr class="open" id="2263">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2263">2263</a></td>
@@ -13438,11 +13438,47 @@ and <I>POD class</I></td>
     <td align="center">Not resolved</td>
   </tr>
   <tr id="2271">
-    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2271">2271</a></td>
-    <td>ready</td>
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#2271">2271</a></td>
+    <td>DR</td>
     <td>Aliasing <TT>this</TT></td>
     <td class="none" align="center">Unknown</td>
   </tr>
+  <tr class="open" id="2272">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2272">2272</a></td>
+    <td>open</td>
+    <td>Implicit initialization of aggregate members of reference type</td>
+    <td align="center">Not resolved</td>
+  </tr>
+  <tr class="open" id="2273">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2273">2273</a></td>
+    <td>open</td>
+    <td>Inheriting constructors vs implicit default constructor</td>
+    <td align="center">Not resolved</td>
+  </tr>
+  <tr class="open" id="2274">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2274">2274</a></td>
+    <td>open</td>
+    <td>Generic lambda capture vs constexpr if</td>
+    <td align="center">Not resolved</td>
+  </tr>
+  <tr class="open" id="2275">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2275">2275</a></td>
+    <td>open</td>
+    <td>Type-dependence of function template</td>
+    <td align="center">Not resolved</td>
+  </tr>
+  <tr class="open" id="2276">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2276">2276</a></td>
+    <td>open</td>
+    <td>Dependent <TT>noexcept</TT> and function type-dependence</td>
+    <td align="center">Not resolved</td>
+  </tr>
+  <tr class="open" id="2277">
+    <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#2277">2277</a></td>
+    <td>open</td>
+    <td>Ambiguity inheriting constructors with default arguments</td>
+    <td align="center">Not resolved</td>
+  </tr>
 </table>
 
 </div>

From 7582e3938bb9fb3e4664efdfb2313df29f27b70b Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Tue, 16 May 2017 19:47:31 +0000
Subject: [PATCH 2/9] Vendor import of libc++ trunk r303197:
 https://llvm.org/svn/llvm-project/libcxx/trunk@303197

---
 ...eyor-reqs.cmd => appveyor-reqs-install.cmd |  12 +-
 appveyor.yml                                  |  22 +-
 docs/CMakeLists.txt                           |   4 +-
 docs/TestingLibcxx.rst                        |   2 +-
 include/__bsd_locale_fallbacks.h              |  31 +-
 include/__config                              |  12 +-
 include/__locale                              |  19 +
 include/__mutex_base                          |   1 +
 include/__threading_support                   |   1 +
 include/__undef_min_max                       |   4 +-
 include/algorithm                             | 127 +++-
 include/ctype.h                               |   9 -
 include/experimental/numeric                  |  13 +-
 include/ext/hash_map                          |   2 +-
 include/ext/hash_set                          |   2 +-
 include/limits                                |   4 +-
 include/locale                                |   3 -
 include/memory                                |  10 +-
 include/numeric                               |  11 +-
 include/stdio.h                               |   5 +-
 include/stdlib.h                              |   4 -
 include/string_view                           |   8 -
 .../{limits_win32.h => limits_msvc_win32.h}   |  27 +-
 include/support/win32/locale_mgmt_win32.h     |  33 -
 include/support/win32/locale_win32.h          |  32 +-
 include/support/win32/support.h               | 177 -----
 include/variant                               |  25 +-
 include/wchar.h                               |   9 +-
 src/include/atomic_support.h                  |   2 +-
 src/locale.cpp                                |  46 +-
 src/string.cpp                                |   5 +-
 .../runtime/exception_pointer_msvc.ipp        |   1 +
 src/support/win32/locale_win32.cpp            |  26 +-
 src/thread.cpp                                |   2 +-
 .../fopen.fail.cpp                            |   2 +
 .../rename.fail.cpp                           |   2 +
 ...otify_from_pthread_created_thread.pass.cpp |   0
 .../thread.thread.this/sleep_for.pass.cpp     |   1 -
 .../function.objects/refwrap/binary.pass.cpp  |   0
 .../function.objects/refwrap/unary.pass.cpp   |   0
 .../template.bitset/includes.pass.cpp         |  32 +
 .../tuple/tuple.tuple/empty_member.pass.cpp   |   0
 .../meta.type.synop/meta.unary.prop.pass.cpp  |   6 +
 .../lit.local.cfg                             |   2 -
 .../delete_align_val_t_replace.pass.cpp       |   2 +-
 .../new.delete.array/new_align_val_t.pass.cpp |   2 +-
 .../new_align_val_t_nothrow.pass.cpp          |   2 +-
 .../new_align_val_t_nothrow_replace.pass.cpp  |   2 +-
 .../new_align_val_t_replace.pass.cpp          |   3 +-
 .../delete_align_val_t_replace.pass.cpp       |   2 +-
 .../new_align_val_t.pass.cpp                  |   2 +-
 .../new_align_val_t_nothrow.pass.cpp          |   2 +-
 .../new_align_val_t_nothrow_replace.pass.cpp  |   3 +-
 .../new_align_val_t_replace.pass.cpp          |   3 +-
 .../support.types/byteops/and.assign.pass.cpp |   9 +-
 .../support.types/byteops/and.pass.cpp        |   9 +-
 .../byteops/enum_direct_init.pass.cpp         |  21 +
 .../byteops/lshift.assign.pass.cpp            |   7 +-
 .../support.types/byteops/lshift.fail.cpp     |   5 +-
 .../support.types/byteops/lshift.pass.cpp     |   7 +-
 .../support.types/byteops/not.pass.cpp        |   9 +-
 .../support.types/byteops/or.assign.pass.cpp  |   9 +-
 .../support.types/byteops/or.pass.cpp         |   9 +-
 .../byteops/rshift.assign.pass.cpp            |   7 +-
 .../support.types/byteops/rshift.fail.cpp     |   5 +-
 .../support.types/byteops/rshift.pass.cpp     |   7 +-
 .../support.types/byteops/to_integer.fail.cpp |   5 +-
 .../support.types/byteops/to_integer.pass.cpp |   7 +-
 .../support.types/byteops/xor.assign.pass.cpp |   9 +-
 .../support.types/byteops/xor.pass.cpp        |   9 +-
 .../locale.stdcvt/codecvt_utf16_out.pass.cpp  | 626 ++++++++--------
 .../locale.stdcvt/codecvt_utf8_out.pass.cpp   | 708 +++++++-----------
 .../codecvt_utf8_utf16_in.pass.cpp            | 546 ++++++--------
 .../codecvt_utf8_utf16_out.pass.cpp           | 648 +++++++---------
 .../conversions.string/converted.pass.cpp     |  39 +-
 .../conversions.string/from_bytes.pass.cpp    |  65 +-
 .../conversions.string/to_bytes.pass.cpp      |  65 +-
 .../numeric.ops/numeric.ops.gcd/gcd.pass.cpp  |  45 +-
 .../numeric.ops/numeric.ops.lcm/lcm.pass.cpp  |  49 +-
 .../re/re.traits/lookup_classname.pass.cpp    |  68 +-
 .../string.cons/T_size_size.pass.cpp          |  18 +-
 .../string.view.modifiers/clear.pass.cpp      |  67 --
 .../sleep_for_tested_elsewhere.pass.cpp       |  22 +
 .../allocator.adaptor.cnstr/allocs.pass.cpp   |   8 +-
 .../func.not_fn/not_fn.pass.cpp               |   2 +-
 .../func.wrap.func.con/copy_move.pass.cpp     |   3 +-
 .../func.wrap.func.targ/target.pass.cpp       |   4 +
 .../enable_shared_from_this.pass.cpp          |  10 +
 .../meta/meta.rel/is_convertible.pass.cpp     |   4 +-
 .../is_trivially_copyable.pass.cpp            |  12 +-
 .../template.bitset/includes.pass.cpp         |  35 +-
 .../tuple.tuple/tuple.cnstr/dtor.pass.cpp     |   4 +
 .../utility/pairs/pairs.pair/dtor.pass.cpp    |   4 +
 .../variant/variant.visit/visit.pass.cpp      |  10 +
 test/support/archetypes.hpp                   |   9 +
 test/support/archetypes.ipp                   |   4 +
 .../support/filesystem_dynamic_test_helper.py |   2 +-
 test/support/filesystem_test_helper.hpp       |   4 +-
 test/support/msvc_stdlib_force_include.hpp    |  30 +-
 .../c1xx_broken_za_ctor_check.pass.cpp        |  41 +
 test/support/test_macros.h                    |   8 +-
 test/support/test_workarounds.h               |   7 +
 utils/libcxx/test/config.py                   |  25 +-
 utils/libcxx/test/executor.py                 |  29 +-
 utils/libcxx/test/format.py                   |   2 +-
 www/cxx1z_status.html                         |  10 +-
 106 files changed, 1946 insertions(+), 2180 deletions(-)
 rename install-appveyor-reqs.cmd => appveyor-reqs-install.cmd (81%)
 rename include/support/win32/{limits_win32.h => limits_msvc_win32.h} (80%)
 delete mode 100644 include/support/win32/locale_mgmt_win32.h
 delete mode 100644 include/support/win32/support.h
 rename test/{std => libcxx}/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp (89%)
 rename test/{std => libcxx}/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp (89%)
 rename test/{std => libcxx}/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp (100%)
 rename test/{std => libcxx}/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp (99%)
 rename test/{std => libcxx}/utilities/function.objects/refwrap/binary.pass.cpp (100%)
 rename test/{std => libcxx}/utilities/function.objects/refwrap/unary.pass.cpp (100%)
 create mode 100644 test/libcxx/utilities/template.bitset/includes.pass.cpp
 rename test/{std => libcxx}/utilities/tuple/tuple.tuple/empty_member.pass.cpp (100%)
 delete mode 100644 test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/lit.local.cfg
 create mode 100644 test/std/language.support/support.types/byteops/enum_direct_init.pass.cpp
 delete mode 100644 test/std/strings/string.view/string.view.modifiers/clear.pass.cpp
 create mode 100644 test/std/thread/thread.threads/thread.thread.this/sleep_for_tested_elsewhere.pass.cpp
 create mode 100644 test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp

diff --git a/install-appveyor-reqs.cmd b/appveyor-reqs-install.cmd
similarity index 81%
rename from install-appveyor-reqs.cmd
rename to appveyor-reqs-install.cmd
index ebd72ca0a2da..3a2087af51fe 100644
--- a/install-appveyor-reqs.cmd
+++ b/appveyor-reqs-install.cmd
@@ -1,11 +1,12 @@
 @echo on
+
 if NOT EXIST C:\projects\deps (
   mkdir C:\projects\deps
 )
 cd C:\projects\deps
 
 ::###########################################################################
-:: Setup the path to Clang-cl
+:: Setup Compiler
 ::###########################################################################
 if NOT EXIST llvm-installer.exe (
   appveyor DownloadFile http://llvm.org/pre-releases/win-snapshots/LLVM-5.0.0-r301646-win32.exe -FileName llvm-installer.exe
@@ -13,8 +14,13 @@ if NOT EXIST llvm-installer.exe (
 if "%CLANG_VERSION%"=="ToT" (
     START /WAIT llvm-installer.exe /S /D=C:\"Program Files\LLVM"
 )
-@set PATH="C:\Program Files\LLVM\bin";%PATH%
-clang-cl -v
+if DEFINED CLANG_VERSION  @set PATH="C:\Program Files\LLVM\bin";%PATH%
+if DEFINED CLANG_VERSION  clang-cl -v
+
+if DEFINED MINGW_PATH rename "C:\Program Files\Git\usr\bin\sh.exe" "sh-ignored.exe"
+if DEFINED MINGW_PATH @set "PATH=%PATH:C:\Program Files (x86)\Git\bin=%"
+if DEFINED MINGW_PATH @set "PATH=%PATH%;%MINGW_PATH%"
+if DEFINED MINGW_PATH g++ -v
 
 ::###########################################################################
 :: Install a recent CMake
diff --git a/appveyor.yml b/appveyor.yml
index 1d9e88a87b15..be69a555d778 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -11,24 +11,35 @@ configuration:
 environment:
   matrix:
     - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+      CMAKE_OPTIONS: -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe
       CLANG_VERSION: ToT
       MSVC_SETUP_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat
       MSVC_SETUP_ARG: x86
+      GENERATOR: Ninja
+      MAKE_PROGRAM: ninja
       APPVEYOR_SAVE_CACHE_ON_ERROR: true
     - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+      CMAKE_OPTIONS: -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe
       CLANG_VERSION: 4
       MSVC_SETUP_PATH: C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat
       MSVC_SETUP_ARG: x86_amd64
+      GENERATOR: Ninja
+      MAKE_PROGRAM: ninja
+      APPVEYOR_SAVE_CACHE_ON_ERROR: true
+    - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+      MINGW_PATH: C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin
+      GENERATOR: MinGW Makefiles
+      MAKE_PROGRAM: mingw32-make
       APPVEYOR_SAVE_CACHE_ON_ERROR: true
 
 install:
   ############################################################################
   # All external dependencies are installed in C:\projects\deps
   ############################################################################
-  - call "%APPVEYOR_BUILD_FOLDER%\\install-appveyor-reqs.cmd"
+  - call "%APPVEYOR_BUILD_FOLDER%\\appveyor-reqs-install.cmd"
 
 before_build:
-  - call "%MSVC_SETUP_PATH%" %MSVC_SETUP_ARG%
+  - if DEFINED MSVC_SETUP_PATH call "%MSVC_SETUP_PATH%" %MSVC_SETUP_ARG%
   - cd %APPVEYOR_BUILD_FOLDER%
 
 build_script:
@@ -39,8 +50,7 @@ build_script:
   #############################################################################
   # Configuration Step
   #############################################################################
-  - cmake -G Ninja %extra_cmake_flags%
-    -DCMAKE_C_COMPILER=clang-cl.exe -DCMAKE_CXX_COMPILER=clang-cl.exe
+  - cmake -G "%GENERATOR%" %CMAKE_OPTIONS%
     "-DCMAKE_BUILD_TYPE=%configuration%"
     "-DLLVM_PATH=C:\projects\deps\llvm" -DLIBCXX_ENABLE_EXPERIMENTAL_LIBRARY=OFF
     -DLLVM_LIT_ARGS="-sv --show-xfail --show-unsupported"
@@ -49,10 +59,10 @@ build_script:
   #############################################################################
   # Build Step
   #############################################################################
-  - ninja
+  - "%MAKE_PROGRAM%"
 
 test_script:
-  - ninja check-cxx
+  - "%MAKE_PROGRAM% check-cxx"
 
 on_failure:
   - appveyor PushArtifact CMakeFiles/CMakeOutput.log
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index f63ee00a5a02..d679761a5adb 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,9 +1,9 @@
 
 if (LLVM_ENABLE_SPHINX)
+  include(AddSphinxTarget)
   if (SPHINX_FOUND)
-    include(AddSphinxTarget)
     if (${SPHINX_OUTPUT_HTML})
       add_sphinx_target(html libcxx)
     endif()
   endif()
-endif()
\ No newline at end of file
+endif()
diff --git a/docs/TestingLibcxx.rst b/docs/TestingLibcxx.rst
index e4292764c4d0..5c48ebe61ddf 100644
--- a/docs/TestingLibcxx.rst
+++ b/docs/TestingLibcxx.rst
@@ -119,7 +119,7 @@ configuration. Passing the option on the command line will override the default.
 .. option:: libcxx_site_config=<path/to/lit.site.cfg>
 
   Specify the site configuration to use when running the tests.  This option
-  overrides the enviroment variable LIBCXX_SITE_CONFIG.
+  overrides the environment variable LIBCXX_SITE_CONFIG.
 
 .. option:: cxx_headers=<path/to/headers>
 
diff --git a/include/__bsd_locale_fallbacks.h b/include/__bsd_locale_fallbacks.h
index cbc8ad226fd2..3425ce659b7e 100644
--- a/include/__bsd_locale_fallbacks.h
+++ b/include/__bsd_locale_fallbacks.h
@@ -19,27 +19,24 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-typedef _VSTD::remove_pointer<locale_t>::type __use_locale_struct;
-typedef _VSTD::unique_ptr<__use_locale_struct, decltype(&uselocale)> __locale_raii;
-
 inline _LIBCPP_ALWAYS_INLINE
 decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return MB_CUR_MAX;
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 wint_t __libcpp_btowc_l(int __c, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return btowc(__c);
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 int __libcpp_wctob_l(wint_t __c, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return wctob(__c);
 }
 
@@ -47,14 +44,14 @@ inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_wcsnrtombs_l(char *__dest, const wchar_t **__src, size_t __nwc,
                          size_t __len, mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return wcsnrtombs(__dest, __src, __nwc, __len, __ps);
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_wcrtomb_l(char *__s, wchar_t __wc, mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return wcrtomb(__s, __wc, __ps);
 }
 
@@ -62,7 +59,7 @@ inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_mbsnrtowcs_l(wchar_t * __dest, const char **__src, size_t __nms,
                       size_t __len, mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return mbsnrtowcs(__dest, __src, __nms, __len, __ps);
 }
 
@@ -70,28 +67,28 @@ inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_mbrtowc_l(wchar_t *__pwc, const char *__s, size_t __n,
                    mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return mbrtowc(__pwc, __s, __n, __ps);
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 int __libcpp_mbtowc_l(wchar_t *__pwc, const char *__pmb, size_t __max, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return mbtowc(__pwc, __pmb, __max);
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_mbrlen_l(const char *__s, size_t __n, mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return mbrlen(__s, __n, __ps);
 }
 
 inline _LIBCPP_ALWAYS_INLINE
 lconv *__libcpp_localeconv_l(locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return localeconv();
 }
 
@@ -99,7 +96,7 @@ inline _LIBCPP_ALWAYS_INLINE
 size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len,
                      mbstate_t *__ps, locale_t __l)
 {
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     return mbsrtowcs(__dest, __src, __len, __ps);
 }
 
@@ -107,7 +104,7 @@ inline
 int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__format, ...) {
     va_list __va;
     va_start(__va, __format);
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     int __res = vsnprintf(__s, __n, __format, __va);
     va_end(__va);
     return __res;
@@ -117,7 +114,7 @@ inline
 int __libcpp_asprintf_l(char **__s, locale_t __l, const char *__format, ...) {
     va_list __va;
     va_start(__va, __format);
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     int __res = vasprintf(__s, __format, __va);
     va_end(__va);
     return __res;
@@ -127,7 +124,7 @@ inline
 int __libcpp_sscanf_l(const char *__s, locale_t __l, const char *__format, ...) {
     va_list __va;
     va_start(__va, __format);
-    __locale_raii __current( uselocale(__l), uselocale );
+    __libcpp_locale_guard __current(__l);
     int __res = vsscanf(__s, __format, __va);
     va_end(__va);
     return __res;
diff --git a/include/__config b/include/__config
index 2a2907494b2b..4ad700e234c2 100644
--- a/include/__config
+++ b/include/__config
@@ -129,6 +129,12 @@
 
 #define __has_keyword(__x) !(__is_identifier(__x))
 
+#ifdef __has_include
+#define __libcpp_has_include(__x) __has_include(__x)
+#else
+#define __libcpp_has_include(__x) 0
+#endif
+
 #if defined(__clang__)
 #define _LIBCPP_COMPILER_CLANG
 # ifndef __apple_build_version__
@@ -968,7 +974,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 #  if defined(__GNUC__) && ((__GNUC__ >= 5) || (__GNUC__ == 4 && \
    (__GNUC_MINOR__ >= 3 || __GNUC_PATCHLEVEL__ >= 2))) && !defined(__GXX_RTTI)
 #    define _LIBCPP_NO_RTTI
-#  elif defined(_LIBCPP_MSVC) && !defined(_CPPRTTI)
+#  elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
 #    define _LIBCPP_NO_RTTI
 #  endif
 #endif
@@ -980,6 +986,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 // Thread API
 #if !defined(_LIBCPP_HAS_NO_THREADS) && \
     !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \
+    !defined(_LIBCPP_HAS_THREAD_API_WIN32) && \
     !defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)
 # if defined(__FreeBSD__) || \
     defined(__Fuchsia__) || \
@@ -987,7 +994,8 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
     defined(__linux__) || \
     defined(__APPLE__) || \
     defined(__CloudABI__) || \
-    defined(__sun__)
+    defined(__sun__) || \
+    (defined(__MINGW32__) && __libcpp_has_include(<pthread.h>))
 #   define _LIBCPP_HAS_THREAD_API_PTHREAD
 # elif defined(_LIBCPP_WIN32API)
 #  define _LIBCPP_HAS_THREAD_API_WIN32
diff --git a/include/__locale b/include/__locale
index 4184e7e03489..cf3ba23b9092 100644
--- a/include/__locale
+++ b/include/__locale
@@ -49,6 +49,25 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+#if !defined(_LIBCPP_LOCALE__L_EXTENSIONS) || defined(_LIBCPP_MSVCRT)
+struct __libcpp_locale_guard {
+  _LIBCPP_INLINE_VISIBILITY
+  __libcpp_locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {}
+
+  _LIBCPP_INLINE_VISIBILITY
+  ~__libcpp_locale_guard() {
+    if (__old_loc_)
+      uselocale(__old_loc_);
+  }
+
+  locale_t __old_loc_;
+private:
+  __libcpp_locale_guard(__libcpp_locale_guard const&);
+  __libcpp_locale_guard& operator=(__libcpp_locale_guard const&);
+};
+#endif
+
+
 class _LIBCPP_TYPE_VIS locale;
 
 template <class _Facet>
diff --git a/include/__mutex_base b/include/__mutex_base
index a6d5e79c4c86..7f5e2ea2810e 100644
--- a/include/__mutex_base
+++ b/include/__mutex_base
@@ -15,6 +15,7 @@
 #include <chrono>
 #include <system_error>
 #include <__threading_support>
+#include <__undef_min_max>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
diff --git a/include/__threading_support b/include/__threading_support
index 080ebd256b8f..385fff32b350 100644
--- a/include/__threading_support
+++ b/include/__threading_support
@@ -30,6 +30,7 @@
 #include <Windows.h>
 #include <process.h>
 #include <fibersapi.h>
+#include <__undef_min_max>
 #endif
 
 #if defined(_LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL) || \
diff --git a/include/__undef_min_max b/include/__undef_min_max
index f4ca091def45..71db3965e3cd 100644
--- a/include/__undef_min_max
+++ b/include/__undef_min_max
@@ -10,7 +10,7 @@
 
 #ifdef min
 #if !defined(_LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS)
-#if defined(_LIBCPP_MSVC)
+#if defined(_LIBCPP_WARNING)
 _LIBCPP_WARNING("macro min is incompatible with C++.  Try #define NOMINMAX "
                 "before any Windows header. #undefing min")
 #else
@@ -22,7 +22,7 @@ _LIBCPP_WARNING("macro min is incompatible with C++.  Try #define NOMINMAX "
 
 #ifdef max
 #if !defined(_LIBCPP_DISABLE_MACRO_CONFLICT_WARNINGS)
-#if defined(_LIBCPP_MSVC)
+#if defined(_LIBCPP_WARNING)
 _LIBCPP_WARNING("macro max is incompatible with C++.  Try #define NOMINMAX "
                 "before any Windows header. #undefing max")
 #else
diff --git a/include/algorithm b/include/algorithm
index c3517a11bb0d..08ca23ff6168 100644
--- a/include/algorithm
+++ b/include/algorithm
@@ -644,8 +644,8 @@ template <class BidirectionalIterator, class Compare>
 #if defined(__IBMCPP__)
 #include "support/ibm/support.h"
 #endif
-#if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__)
-#include "support/win32/support.h"
+#if defined(_LIBCPP_COMPILER_MSVC)
+#include <intrin.h>
 #endif
 
 #include <__undef_min_max>
@@ -783,51 +783,132 @@ struct __debug_less
 
 // Precondition:  __x != 0
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned
-__ctz(unsigned __x)
-{
+unsigned __ctz(unsigned __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned>(__builtin_ctz(__x));
+#else
+  static_assert(sizeof(unsigned) == sizeof(unsigned long), "");
+  static_assert(sizeof(unsigned long) == 4, "");
+  unsigned long where;
+  // Search from LSB to MSB for first set bit.
+  // Returns zero if no set bit is found.
+  if (_BitScanForward(&where, mask))
+    return where;
+  return 32;
+#endif
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned long
-__ctz(unsigned long __x)
-{
+unsigned long __ctz(unsigned long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned long>(__builtin_ctzl(__x));
+#else
+    static_assert(sizeof(unsigned long) == sizeof(unsigned), "");
+    return __ctz(static_cast<unsigned>(__x));
+#endif
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned long long
-__ctz(unsigned long long __x)
-{
+unsigned long long __ctz(unsigned long long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned long long>(__builtin_ctzll(__x));
+#else
+    unsigned long where;
+// Search from LSB to MSB for first set bit.
+// Returns zero if no set bit is found.
+#if defined(_LIBCPP_HAS_BITSCAN64)
+    (defined(_M_AMD64) || defined(__x86_64__))
+  if (_BitScanForward64(&where, mask))
+    return static_cast<int>(where);
+#else
+  // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls.
+  // Scan the Low Word.
+  if (_BitScanForward(&where, static_cast<unsigned long>(mask)))
+    return where;
+  // Scan the High Word.
+  if (_BitScanForward(&where, static_cast<unsigned long>(mask >> 32)))
+    return where + 32; // Create a bit offset from the LSB.
+#endif
+  return 64;
+#endif // _LIBCPP_COMPILER_MSVC
 }
 
 // Precondition:  __x != 0
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned
-__clz(unsigned __x)
-{
+unsigned __clz(unsigned __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned>(__builtin_clz(__x));
+#else
+  static_assert(sizeof(unsigned) == sizeof(unsigned long), "");
+  static_assert(sizeof(unsigned long) == 4, "");
+  unsigned long where;
+  // Search from LSB to MSB for first set bit.
+  // Returns zero if no set bit is found.
+  if (_BitScanReverse(&where, mask))
+    return 31 - where;
+  return 32; // Undefined Behavior.
+#endif
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned long
-__clz(unsigned long __x)
-{
+unsigned long __clz(unsigned long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned long>(__builtin_clzl (__x));
+#else
+    static_assert(sizeof(unsigned) == sizeof(unsigned long), "");
+    return __clz(static_cast<unsigned>(__x));
+#endif
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
-unsigned long long
-__clz(unsigned long long __x)
-{
+unsigned long long __clz(unsigned long long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
     return static_cast<unsigned long long>(__builtin_clzll(__x));
+#else
+  unsigned long where;
+// BitScanReverse scans from MSB to LSB for first set bit.
+// Returns 0 if no set bit is found.
+#if defined(_LIBCPP_HAS_BITSCAN64)
+  if (_BitScanReverse64(&where, mask))
+    return static_cast<int>(63 - where);
+#else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&where, static_cast<unsigned long>(mask >> 32)))
+    return 63 - (where + 32); // Create a bit offset from the MSB.
+  // Scan the low 32 bits.
+  if (_BitScanReverse(&where, static_cast<unsigned long>(mask)))
+    return 63 - where;
+#endif
+  return 64; // Undefined Behavior.
+#endif // _LIBCPP_COMPILER_MSVC
 }
 
-inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned           __x) {return __builtin_popcount  (__x);}
-inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned      long __x) {return __builtin_popcountl (__x);}
-inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned long long __x) {return __builtin_popcountll(__x);}
+inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+  return __builtin_popcount  (__x);
+#else
+  static_assert(sizeof(unsigned) == 4, "");
+  return __popcnt(__x);
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+  return __builtin_popcountl (__x);
+#else
+  static_assert(sizeof(unsigned long) == 4, "");
+  return __popcnt(__x);
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY int __pop_count(unsigned long long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+  return __builtin_popcountll(__x);
+#else
+  static_assert(sizeof(unsigned long long) == 8, "");
+  return __popcnt64(__x);
+#endif
+}
 
 // all_of
 
diff --git a/include/ctype.h b/include/ctype.h
index 22d6c49be9e1..e97ff3c48876 100644
--- a/include/ctype.h
+++ b/include/ctype.h
@@ -40,15 +40,6 @@ int toupper(int c);
 
 #ifdef __cplusplus
 
-#if defined(_LIBCPP_MSVCRT)
-// We support including .h headers inside 'extern "C"' contexts, so switch
-// back to C++ linkage before including these C++ headers.
-extern "C++" {
-  #include "support/win32/support.h"
-  #include "support/win32/locale_win32.h"
-}
-#endif // _LIBCPP_MSVCRT
-
 #undef isalnum
 #undef isalpha
 #undef isblank
diff --git a/include/experimental/numeric b/include/experimental/numeric
index d1209dbec15b..6488a68eca6c 100644
--- a/include/experimental/numeric
+++ b/include/experimental/numeric
@@ -66,11 +66,11 @@ struct __abs<_Result, _Source, false> {
 
 
 template<class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
-_Tp __gcd(_Tp __m, _Tp __n)
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN
+inline _Tp __gcd(_Tp __m, _Tp __n)
 {
     static_assert((!is_signed<_Tp>::value), "" );
-    return __n == 0 ? __m : __gcd<_Tp>(__n, __m % __n);
+    return __n == 0 ? __m : _VSTD_LFTS_V2::__gcd<_Tp>(__n, __m % __n);
 }
 
 
@@ -84,8 +84,9 @@ gcd(_Tp __m, _Up __n)
     static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" );
     using _Rp = common_type_t<_Tp,_Up>;
     using _Wp = make_unsigned_t<_Rp>;
-    return static_cast<_Rp>(__gcd(static_cast<_Wp>(__abs<_Rp, _Tp>()(__m)),
-                                  static_cast<_Wp>(__abs<_Rp, _Up>()(__n))));
+    return static_cast<_Rp>(_VSTD_LFTS_V2::__gcd(
+      static_cast<_Wp>(__abs<_Rp, _Tp>()(__m)),
+      static_cast<_Wp>(__abs<_Rp, _Up>()(__n))));
 }
 
 template<class _Tp, class _Up>
@@ -100,7 +101,7 @@ lcm(_Tp __m, _Up __n)
         return 0;
 
     using _Rp = common_type_t<_Tp,_Up>;
-    _Rp __val1 = __abs<_Rp, _Tp>()(__m) / gcd(__m, __n);
+    _Rp __val1 = __abs<_Rp, _Tp>()(__m) / _VSTD_LFTS_V2::gcd(__m, __n);
     _Rp __val2 = __abs<_Rp, _Up>()(__n);
     _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm");
     return __val1 * __val2;
diff --git a/include/ext/hash_map b/include/ext/hash_map
index 66f2b11c0594..998e8f65994e 100644
--- a/include/ext/hash_map
+++ b/include/ext/hash_map
@@ -207,7 +207,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #include <ext/__hash>
 
 #if __DEPRECATED
-#if defined(_LIBCPP_MSVC)
+#if defined(_LIBCPP_WARNING)
     _LIBCPP_WARNING("Use of the header <ext/hash_map> is deprecated.  Migrate to <unordered_map>")
 #else
 #   warning Use of the header <ext/hash_map> is deprecated.  Migrate to <unordered_map>
diff --git a/include/ext/hash_set b/include/ext/hash_set
index 916ed6910d7d..38f81ed3b5c8 100644
--- a/include/ext/hash_set
+++ b/include/ext/hash_set
@@ -199,7 +199,7 @@ template <class Value, class Hash, class Pred, class Alloc>
 #include <ext/__hash>
 
 #if __DEPRECATED
-#if defined(_LIBCPP_MSVC)
+#if defined(_LIBCPP_WARNING)
     _LIBCPP_WARNING("Use of the header <ext/hash_set> is deprecated.  Migrate to <unordered_set>")
 #else
 #   warning Use of the header <ext/hash_set> is deprecated.  Migrate to <unordered_set>
diff --git a/include/limits b/include/limits
index 609c4d4ed95a..4755c57cf903 100644
--- a/include/limits
+++ b/include/limits
@@ -111,8 +111,8 @@ template<> class numeric_limits<cv long double>;
 
 #include <__undef_min_max>
 
-#if defined(_LIBCPP_MSVCRT)
-#include "support/win32/limits_win32.h"
+#if defined(_LIBCPP_COMPILER_MSVC)
+#include "support/win32/limits_msvc_win32.h"
 #endif // _LIBCPP_MSVCRT
 
 #if defined(__IBMCPP__)
diff --git a/include/locale b/include/locale
index ad1c1f0083ec..d29a2dc70a5b 100644
--- a/include/locale
+++ b/include/locale
@@ -233,9 +233,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #define __cloc_defined
 #endif
 
-typedef _VSTD::remove_pointer<locale_t>::type __locale_struct;
-typedef _VSTD::unique_ptr<__locale_struct, decltype(&freelocale)> __locale_unique_ptr;
-
 // __scan_keyword
 // Scans [__b, __e) until a match is found in the basic_strings range
 //  [__kb, __ke) or until it can be shown that there is no match in [__kb, __ke).
diff --git a/include/memory b/include/memory
index 41ab01b46f7e..4201c92dd723 100644
--- a/include/memory
+++ b/include/memory
@@ -996,11 +996,11 @@ struct __rebind_pointer {
 
 // allocator_traits
 
-namespace __has_pointer_type_imp
+struct __has_pointer_type_imp
 {
     template <class _Up> static __two __test(...);
     template <class _Up> static char __test(typename _Up::pointer* = 0);
-}
+};
 
 template <class _Tp>
 struct __has_pointer_type
@@ -3924,7 +3924,10 @@ private:
 
     template <class _Yp, class _OrigPtr>
         _LIBCPP_INLINE_VISIBILITY
-        void
+        typename enable_if<is_convertible<_OrigPtr*,
+                                          const enable_shared_from_this<_Yp>*
+        >::value,
+            void>::type
         __enable_weak_this(const enable_shared_from_this<_Yp>* __e,
                            _OrigPtr* __ptr) _NOEXCEPT
         {
@@ -3943,6 +3946,7 @@ private:
     template <class _Up> friend class _LIBCPP_TEMPLATE_VIS weak_ptr;
 };
 
+
 template<class _Tp>
 inline
 _LIBCPP_CONSTEXPR
diff --git a/include/numeric b/include/numeric
index 8f25146938a4..9c98cdbe262e 100644
--- a/include/numeric
+++ b/include/numeric
@@ -222,11 +222,11 @@ struct __abs<_Result, _Source, false> {
 
 
 template<class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN
 _Tp __gcd(_Tp __m, _Tp __n)
 {
     static_assert((!is_signed<_Tp>::value), "");
-    return __n == 0 ? __m : __gcd<_Tp>(__n, __m % __n);
+    return __n == 0 ? __m : _VSTD::__gcd<_Tp>(__n, __m % __n);
 }
 
 
@@ -240,8 +240,9 @@ gcd(_Tp __m, _Up __n)
     static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" );
     using _Rp = common_type_t<_Tp,_Up>;
     using _Wp = make_unsigned_t<_Rp>;
-    return static_cast<_Rp>(__gcd(static_cast<_Wp>(__abs<_Rp, _Tp>()(__m)),
-                                  static_cast<_Wp>(__abs<_Rp, _Up>()(__n))));
+    return static_cast<_Rp>(_VSTD::__gcd(
+        static_cast<_Wp>(__abs<_Rp, _Tp>()(__m)),
+        static_cast<_Wp>(__abs<_Rp, _Up>()(__n))));
 }
 
 template<class _Tp, class _Up>
@@ -256,7 +257,7 @@ lcm(_Tp __m, _Up __n)
         return 0;
 
     using _Rp = common_type_t<_Tp,_Up>;
-    _Rp __val1 = __abs<_Rp, _Tp>()(__m) / gcd(__m, __n);
+    _Rp __val1 = __abs<_Rp, _Tp>()(__m) / _VSTD::gcd(__m, __n);
     _Rp __val2 = __abs<_Rp, _Up>()(__n);
     _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm");
     return __val1 * __val2;
diff --git a/include/stdio.h b/include/stdio.h
index 56fb2d83bb28..dc534970f9d8 100644
--- a/include/stdio.h
+++ b/include/stdio.h
@@ -111,8 +111,9 @@ void perror(const char* s);
 
 // snprintf
 #if defined(_LIBCPP_MSVCRT)
-extern "C++" {
-#include "support/win32/support.h"
+extern "C" {
+int vasprintf(char **sptr, const char *__restrict fmt, va_list ap);
+int asprintf(char **sptr, const char *__restrict fmt, ...);
 }
 #endif
 
diff --git a/include/stdlib.h b/include/stdlib.h
index 12fd676a15f5..f11c5e76226e 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -97,10 +97,6 @@ void *aligned_alloc(size_t alignment, size_t size);                       // C11
 
 extern "C++" {
 
-#ifdef _LIBCPP_MSVCRT
-#include "support/win32/locale_win32.h"
-#endif // _LIBCPP_MSVCRT
-
 #undef abs
 #undef div
 #undef labs
diff --git a/include/string_view b/include/string_view
index 5c42b36ca565..e59f099f16e7 100644
--- a/include/string_view
+++ b/include/string_view
@@ -103,7 +103,6 @@ namespace std {
       constexpr const_pointer data() const noexcept;
 
       // 7.7, basic_string_view modifiers
-      constexpr void clear() noexcept;
       constexpr void remove_prefix(size_type n);
       constexpr void remove_suffix(size_type n);
       constexpr void swap(basic_string_view& s) noexcept;
@@ -292,13 +291,6 @@ public:
 	const_pointer data() const _NOEXCEPT { return __data; }
 
 	// [string.view.modifiers], modifiers:
-	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-	void clear() _NOEXCEPT
-	{
-		__data = nullptr;
-		__size = 0;
-	}
-
 	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
 	void remove_prefix(size_type __n) _NOEXCEPT
 	{
diff --git a/include/support/win32/limits_win32.h b/include/support/win32/limits_msvc_win32.h
similarity index 80%
rename from include/support/win32/limits_win32.h
rename to include/support/win32/limits_msvc_win32.h
index 406cd3025bf8..1ab2e0b6dc1b 100644
--- a/include/support/win32/limits_win32.h
+++ b/include/support/win32/limits_msvc_win32.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===--------------------- support/win32/limits_win32.h -------------------===//
+//===------------------ support/win32/limits_msvc_win32.h -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,17 +8,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP_SUPPORT_WIN32_LIMITS_WIN32_H
-#define _LIBCPP_SUPPORT_WIN32_LIMITS_WIN32_H
+#ifndef _LIBCPP_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
+#define _LIBCPP_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
 
 #if !defined(_LIBCPP_MSVCRT)
 #error "This header complements the Microsoft C Runtime library, and should not be included otherwise."
-#else
+#endif
+#if defined(__clang__)
+#error "This header should only be included when using Microsofts C1XX frontend"
+#endif
 
 #include <limits.h> // CHAR_BIT
 #include <float.h> // limit constants
+#include <math.h> // HUGE_VAL
+#include <ymath.h> // internal MSVC header providing the needed functionality
 
-#if ! defined(__clang__)
 #define __CHAR_BIT__       CHAR_BIT
 
 #define __FLT_MANT_DIG__   FLT_MANT_DIG
@@ -61,19 +65,8 @@
 #define __LDBL_DENORM_MIN__ 3.64519953188247460253e-4951L
 
 // __builtin replacements/workarounds
-#include <math.h> // HUGE_VAL
-#include <ymath.h> // internal MSVC header providing the needed functionality
-#define __builtin_huge_val()     HUGE_VAL
-#define __builtin_huge_valf()    _FInf._Float
 #define __builtin_huge_vall()    _LInf._Long_double
-#define __builtin_nan(__dummy)   _Nan._Double
-#define __builtin_nanf(__dummy)  _FNan._Float
 #define __builtin_nanl(__dummmy) _LNan._Long_double
-#define __builtin_nans(__dummy)  _Snan._Double
-#define __builtin_nansf(__dummy) _FSnan._Float
 #define __builtin_nansl(__dummy) _LSnan._Long_double
-#endif // ! defined(__clang__)
 
-#endif // _LIBCPP_MSVCRT
-
-#endif // _LIBCPP_SUPPORT_WIN32_LIMITS_WIN32_H
+#endif // _LIBCPP_SUPPORT_WIN32_LIMITS_MSVC_WIN32_H
diff --git a/include/support/win32/locale_mgmt_win32.h b/include/support/win32/locale_mgmt_win32.h
deleted file mode 100644
index b3316d62596f..000000000000
--- a/include/support/win32/locale_mgmt_win32.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// -*- C++ -*-
-//===----------------- support/win32/locale_mgmt_win32.h ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP_SUPPORT_WIN32_LOCALE_MGMT_WIN32_H
-#define _LIBCPP_SUPPORT_WIN32_LOCALE_MGMT_WIN32_H
-
-#include <xlocinfo.h> // _locale_t
-#define locale_t _locale_t
-#define LC_COLLATE_MASK _M_COLLATE
-#define LC_CTYPE_MASK _M_CTYPE
-#define LC_MONETARY_MASK _M_MONETARY
-#define LC_NUMERIC_MASK _M_NUMERIC
-#define LC_TIME_MASK _M_TIME
-#define LC_MESSAGES_MASK _M_MESSAGES
-#define LC_ALL_MASK (  LC_COLLATE_MASK \
-                     | LC_CTYPE_MASK \
-                     | LC_MESSAGES_MASK \
-                     | LC_MONETARY_MASK \
-                     | LC_NUMERIC_MASK \
-                     | LC_TIME_MASK )
-#define freelocale _free_locale
-// FIXME: base currently unused. Needs manual work to construct the new locale
-locale_t newlocale( int mask, const char * locale, locale_t base );
-locale_t uselocale( locale_t newloc );
-
-#endif // _LIBCPP_SUPPORT_WIN32_LOCALE_MGMT_WIN32_H
diff --git a/include/support/win32/locale_win32.h b/include/support/win32/locale_win32.h
index bc717d97990a..7a6c44ca9003 100644
--- a/include/support/win32/locale_win32.h
+++ b/include/support/win32/locale_win32.h
@@ -12,9 +12,30 @@
 #define _LIBCPP_SUPPORT_WIN32_LOCALE_WIN32_H
 
 #include <__config>
-#include "support/win32/support.h"
-#include "support/win32/locale_mgmt_win32.h"
 #include <stdio.h>
+#include <xlocinfo.h> // _locale_t
+
+#define LC_COLLATE_MASK _M_COLLATE
+#define LC_CTYPE_MASK _M_CTYPE
+#define LC_MONETARY_MASK _M_MONETARY
+#define LC_NUMERIC_MASK _M_NUMERIC
+#define LC_TIME_MASK _M_TIME
+#define LC_MESSAGES_MASK _M_MESSAGES
+#define LC_ALL_MASK (  LC_COLLATE_MASK \
+                     | LC_CTYPE_MASK \
+                     | LC_MESSAGES_MASK \
+                     | LC_MONETARY_MASK \
+                     | LC_NUMERIC_MASK \
+                     | LC_TIME_MASK )
+
+#define locale_t _locale_t
+
+// Locale management functions
+#define freelocale _free_locale
+// FIXME: base currently unused. Needs manual work to construct the new locale
+locale_t newlocale( int mask, const char * locale, locale_t base );
+locale_t uselocale( locale_t newloc );
+
 
 lconv *localeconv_l( locale_t loc );
 size_t mbrlen_l( const char *__restrict s, size_t n,
@@ -88,7 +109,6 @@ _LIBCPP_FUNC_VIS int snprintf_l(char *ret, size_t n, locale_t loc, const char *f
 _LIBCPP_FUNC_VIS int asprintf_l( char **ret, locale_t loc, const char *format, ... );
 _LIBCPP_FUNC_VIS int vasprintf_l( char **ret, locale_t loc, const char *format, va_list ap );
 
-
 // not-so-pressing FIXME: use locale to determine blank characters
 inline int isblank_l( int c, locale_t /*loc*/ )
 {
@@ -99,10 +119,4 @@ inline int iswblank_l( wint_t c, locale_t /*loc*/ )
     return ( c == L' ' || c == L'\t' );
 }
 
-#if defined(_LIBCPP_MSVCRT)
-inline int isblank( int c, locale_t /*loc*/ )
-{ return ( c == ' ' || c == '\t' ); }
-inline int iswblank( wint_t c, locale_t /*loc*/ )
-{ return ( c == L' ' || c == L'\t' ); }
-#endif // _LIBCPP_MSVCRT
 #endif // _LIBCPP_SUPPORT_WIN32_LOCALE_WIN32_H
diff --git a/include/support/win32/support.h b/include/support/win32/support.h
deleted file mode 100644
index e48b08ddad03..000000000000
--- a/include/support/win32/support.h
+++ /dev/null
@@ -1,177 +0,0 @@
-// -*- C++ -*-
-//===----------------------- support/win32/support.h ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP_SUPPORT_WIN32_SUPPORT_H
-#define _LIBCPP_SUPPORT_WIN32_SUPPORT_H
-
-// Functions and constants used in libc++ that
-// are missing from the Windows C library.
-
-#include <wchar.h> // mbstate_t
-#include <cstdarg> // va_ macros
-// "builtins" not implemented here for Clang or GCC as they provide
-// implementations. Assuming required for elsewhere else, certainly MSVC.
-#if defined(_LIBCPP_COMPILER_MSVC)
-#include <intrin.h>
-#endif
-#define swprintf _snwprintf
-#define vswprintf _vsnwprintf
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-
-// The mingw headers already define these as static.
-#ifndef __MINGW32__
-extern "C" {
-
-int vasprintf(char **sptr, const char *__restrict fmt, va_list ap);
-int asprintf(char **sptr, const char *__restrict fmt, ...);
-size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
-                  size_t nmc, size_t len, mbstate_t *__restrict ps);
-size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
-                  size_t nwc, size_t len, mbstate_t *__restrict ps);
-}
-#endif // __MINGW32__
-
-#if defined(_LIBCPP_COMPILER_MSVC)
-
-// Bit builtin's make these assumptions when calling _BitScanForward/Reverse
-// etc. These assumptions are expected to be true for Win32/Win64 which this
-// file supports.
-static_assert(sizeof(unsigned long long) == 8, "");
-static_assert(sizeof(unsigned long) == 4, "");
-static_assert(sizeof(unsigned int) == 4, "");
-
-_LIBCPP_ALWAYS_INLINE int __builtin_popcount(unsigned int x)
-{
-  // Binary: 0101...
-  static const unsigned int m1 = 0x55555555;
-  // Binary: 00110011..
-  static const unsigned int m2 = 0x33333333;
-  // Binary:  4 zeros,  4 ones ...
-  static const unsigned int m4 = 0x0f0f0f0f;
-  // The sum of 256 to the power of 0,1,2,3...
-  static const unsigned int h01 = 0x01010101;
-  // Put count of each 2 bits into those 2 bits.
-  x -= (x >> 1) & m1;
-  // Put count of each 4 bits into those 4 bits.
-  x = (x & m2) + ((x >> 2) & m2);
-  // Put count of each 8 bits into those 8 bits.
-  x = (x + (x >> 4)) & m4;
-  // Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24).
-  return (x * h01) >> 24;
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_popcountl(unsigned long x)
-{
-  return __builtin_popcount(static_cast<int>(x));
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_popcountll(unsigned long long x)
-{
-  // Binary: 0101...
-  static const unsigned long long m1 = 0x5555555555555555;
-  // Binary: 00110011..
-  static const unsigned long long m2 = 0x3333333333333333;
-  // Binary:  4 zeros,  4 ones ...
-  static const unsigned long long m4 = 0x0f0f0f0f0f0f0f0f;
-  // The sum of 256 to the power of 0,1,2,3...
-  static const unsigned long long h01 = 0x0101010101010101;
-  // Put count of each 2 bits into those 2 bits.
-  x -= (x >> 1) & m1;
-  // Put count of each 4 bits into those 4 bits.
-  x = (x & m2) + ((x >> 2) & m2);
-  // Put count of each 8 bits into those 8 bits.
-  x = (x + (x >> 4)) & m4;
-  // Returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
-  return static_cast<int>((x * h01) >> 56);
-}
-
-// Returns the number of trailing 0-bits in x, starting at the least significant
-// bit position. If x is 0, the result is undefined.
-_LIBCPP_ALWAYS_INLINE int __builtin_ctzll(unsigned long long mask)
-{
-  unsigned long where;
-// Search from LSB to MSB for first set bit.
-// Returns zero if no set bit is found.
-#if defined(_LIBCPP_HAS_BITSCAN64)
-    (defined(_M_AMD64) || defined(__x86_64__))
-  if (_BitScanForward64(&where, mask))
-    return static_cast<int>(where);
-#else
-  // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls.
-  // Scan the Low Word.
-  if (_BitScanForward(&where, static_cast<unsigned long>(mask)))
-    return static_cast<int>(where);
-  // Scan the High Word.
-  if (_BitScanForward(&where, static_cast<unsigned long>(mask >> 32)))
-    return static_cast<int>(where + 32); // Create a bit offset from the LSB.
-#endif
-  return 64;
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_ctzl(unsigned long mask)
-{
-  unsigned long where;
-  // Search from LSB to MSB for first set bit.
-  // Returns zero if no set bit is found.
-  if (_BitScanForward(&where, mask))
-    return static_cast<int>(where);
-  return 32;
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_ctz(unsigned int mask)
-{
-  // Win32 and Win64 expectations.
-  static_assert(sizeof(mask) == 4, "");
-  static_assert(sizeof(unsigned long) == 4, "");
-  return __builtin_ctzl(static_cast<unsigned long>(mask));
-}
-
-// Returns the number of leading 0-bits in x, starting at the most significant
-// bit position. If x is 0, the result is undefined.
-_LIBCPP_ALWAYS_INLINE int __builtin_clzll(unsigned long long mask)
-{
-  unsigned long where;
-// BitScanReverse scans from MSB to LSB for first set bit.
-// Returns 0 if no set bit is found.
-#if defined(_LIBCPP_HAS_BITSCAN64)
-  if (_BitScanReverse64(&where, mask))
-    return static_cast<int>(63 - where);
-#else
-  // Scan the high 32 bits.
-  if (_BitScanReverse(&where, static_cast<unsigned long>(mask >> 32)))
-    return static_cast<int>(63 -
-                            (where + 32)); // Create a bit offset from the MSB.
-  // Scan the low 32 bits.
-  if (_BitScanReverse(&where, static_cast<unsigned long>(mask)))
-    return static_cast<int>(63 - where);
-#endif
-  return 64; // Undefined Behavior.
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_clzl(unsigned long mask)
-{
-  unsigned long where;
-  // Search from LSB to MSB for first set bit.
-  // Returns zero if no set bit is found.
-  if (_BitScanReverse(&where, mask))
-    return static_cast<int>(31 - where);
-  return 32; // Undefined Behavior.
-}
-
-_LIBCPP_ALWAYS_INLINE int __builtin_clz(unsigned int x)
-{
-  return __builtin_clzl(x);
-}
-#endif // _LIBCPP_MSVC
-
-#endif // _LIBCPP_SUPPORT_WIN32_SUPPORT_H
diff --git a/include/variant b/include/variant
index 88f7b240d029..ba15ed8c4a14 100644
--- a/include/variant
+++ b/include/variant
@@ -425,30 +425,21 @@ struct __base {
     constexpr auto __fmatrix =
         __make_fmatrix<_Visitor&&,
                        decltype(_VSTD::forward<_Vs>(__vs).__as_base())...>();
-    const size_t __indices[] = {__vs.index()...};
-    return __at(__fmatrix, __indices)(_VSTD::forward<_Visitor>(__visitor),
-                                      _VSTD::forward<_Vs>(__vs).__as_base()...);
+    return __at(__fmatrix, __vs.index()...)(
+        _VSTD::forward<_Visitor>(__visitor),
+        _VSTD::forward<_Vs>(__vs).__as_base()...);
   }
 
 private:
   template <class _Tp>
   inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr const _Tp& __at_impl(const _Tp& __elem, const size_t*) {
-    return __elem;
-  }
+  static constexpr const _Tp& __at(const _Tp& __elem) { return __elem; }
 
-  template <class _Tp, size_t _Np>
-  inline _LIBCPP_INLINE_VISIBILITY
-  static constexpr auto&& __at_impl(const array<_Tp, _Np>& __elems,
-                                    const size_t* __index) {
-    return __at_impl(__elems[*__index], __index + 1);
-  }
-
-  template <class _Tp, size_t _Np, size_t _Ip>
+  template <class _Tp, size_t _Np, typename... _Indices>
   inline _LIBCPP_INLINE_VISIBILITY
   static constexpr auto&& __at(const array<_Tp, _Np>& __elems,
-                               const size_t (&__indices)[_Ip]) {
-    return __at_impl(__elems, begin(__indices));
+                               size_t __index, _Indices... __indices) {
+    return __at(__elems[__index], __indices...);
   }
 
   template <class _Fp, class... _Fs>
@@ -1140,7 +1131,7 @@ public:
       : __impl(in_place_index<_Ip>, _VSTD::forward<_Arg>(__arg)) {}
 
   template <size_t _Ip, class... _Args,
-            enable_if_t<(_Ip < sizeof...(_Types)), int> = 0,
+            class = enable_if_t<(_Ip < sizeof...(_Types)), int>,
             class _Tp = variant_alternative_t<_Ip, variant<_Types...>>,
             enable_if_t<is_constructible_v<_Tp, _Args...>, int> = 0>
   inline _LIBCPP_INLINE_VISIBILITY
diff --git a/include/wchar.h b/include/wchar.h
index c0c6ef754fbe..25a318faff25 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -166,9 +166,12 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD
 }
 #endif
 
-#if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT) || defined(__MINGW32__))
-extern "C++" {
-#include <support/win32/support.h> // pull in *swprintf defines
+#if defined(__cplusplus) && defined(_LIBCPP_MSVCRT)
+extern "C" {
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                  size_t nmc, size_t len, mbstate_t *__restrict ps);
+size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
+                  size_t nwc, size_t len, mbstate_t *__restrict ps);
 }  // extern "C++"
 #endif  // __cplusplus && _LIBCPP_MSVCRT
 
diff --git a/src/include/atomic_support.h b/src/include/atomic_support.h
index 378541b23256..08847e630705 100644
--- a/src/include/atomic_support.h
+++ b/src/include/atomic_support.h
@@ -29,7 +29,7 @@
 #endif
 
 #if !defined(_LIBCPP_HAS_ATOMIC_BUILTINS) && !defined(_LIBCPP_HAS_NO_THREADS)
-# if defined(_LIBCPP_MSVC)
+# if defined(_LIBCPP_WARNING)
     _LIBCPP_WARNING("Building libc++ without __atomic builtins is unsupported")
 # else
 #   warning Building libc++ without __atomic builtins is unsupported
diff --git a/src/locale.cpp b/src/locale.cpp
index 1ed9b41fd4ad..4163c2c0af65 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -45,6 +45,24 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+struct __libcpp_unique_locale {
+  __libcpp_unique_locale(const char* nm) : __loc_(newlocale(LC_ALL_MASK, nm, 0)) {}
+
+  ~__libcpp_unique_locale() {
+    if (__loc_)
+      freelocale(__loc_);
+  }
+
+  explicit operator bool() const { return __loc_; }
+
+  locale_t& get() { return __loc_; }
+
+  locale_t __loc_;
+private:
+  __libcpp_unique_locale(__libcpp_unique_locale const&);
+  __libcpp_unique_locale& operator=(__libcpp_unique_locale const&);
+};
+
 #ifdef __cloc_defined
 locale_t __cloc() {
   // In theory this could create a race condition. In practice
@@ -4185,7 +4203,7 @@ __widen_from_utf8<32>::~__widen_from_utf8()
 
 static bool checked_string_to_wchar_convert(wchar_t& dest,
                                             const char* ptr,
-                                            __locale_struct* loc) {
+                                            locale_t loc) {
   if (*ptr == '\0')
     return false;
   mbstate_t mb = {};
@@ -4200,7 +4218,7 @@ static bool checked_string_to_wchar_convert(wchar_t& dest,
 
 static bool checked_string_to_char_convert(char& dest,
                                            const char* ptr,
-                                           __locale_struct* __loc) {
+                                           locale_t __loc) {
   if (*ptr == '\0')
     return false;
   if (!ptr[1]) {
@@ -4295,8 +4313,8 @@ numpunct_byname<char>::__init(const char* nm)
 {
     if (strcmp(nm, "C") != 0)
     {
-        __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-        if (loc == nullptr)
+        __libcpp_unique_locale loc(nm);
+        if (!loc)
             __throw_runtime_error("numpunct_byname<char>::numpunct_byname"
                                 " failed to construct for " + string(nm));
 
@@ -4333,8 +4351,8 @@ numpunct_byname<wchar_t>::__init(const char* nm)
 {
     if (strcmp(nm, "C") != 0)
     {
-        __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-        if (loc == nullptr)
+        __libcpp_unique_locale loc(nm);
+        if (!loc)
             __throw_runtime_error("numpunct_byname<wchar_t>::numpunct_byname"
                                 " failed to construct for " + string(nm));
 
@@ -5820,8 +5838,8 @@ void
 moneypunct_byname<char, false>::init(const char* nm)
 {
     typedef moneypunct<char, false> base;
-    __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-    if (loc == nullptr)
+    __libcpp_unique_locale loc(nm);
+    if (!loc)
         __throw_runtime_error("moneypunct_byname"
                             " failed to construct for " + string(nm));
 
@@ -5864,8 +5882,8 @@ void
 moneypunct_byname<char, true>::init(const char* nm)
 {
     typedef moneypunct<char, true> base;
-    __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-    if (loc == nullptr)
+    __libcpp_unique_locale loc(nm);
+    if (!loc)
         __throw_runtime_error("moneypunct_byname"
                             " failed to construct for " + string(nm));
 
@@ -5924,8 +5942,8 @@ void
 moneypunct_byname<wchar_t, false>::init(const char* nm)
 {
     typedef moneypunct<wchar_t, false> base;
-    __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-    if (loc == nullptr)
+    __libcpp_unique_locale loc(nm);
+    if (!loc)
         __throw_runtime_error("moneypunct_byname"
                             " failed to construct for " + string(nm));
     lconv* lc = __libcpp_localeconv_l(loc.get());
@@ -5989,8 +6007,8 @@ void
 moneypunct_byname<wchar_t, true>::init(const char* nm)
 {
     typedef moneypunct<wchar_t, true> base;
-    __locale_unique_ptr loc(newlocale(LC_ALL_MASK, nm, 0), freelocale);
-    if (loc == nullptr)
+    __libcpp_unique_locale loc(nm);
+    if (!loc)
         __throw_runtime_error("moneypunct_byname"
                             " failed to construct for " + string(nm));
 
diff --git a/src/string.cpp b/src/string.cpp
index cd644330b3a7..d7ebdd3e5c9a 100644
--- a/src/string.cpp
+++ b/src/string.cpp
@@ -13,9 +13,6 @@
 #include "cerrno"
 #include "limits"
 #include "stdexcept"
-#ifdef _LIBCPP_MSVCRT
-#include "support/win32/support.h"
-#endif // _LIBCPP_MSVCRT
 #include <stdio.h>
 
 _LIBCPP_BEGIN_NAMESPACE_STD
@@ -430,7 +427,7 @@ get_swprintf()
 #ifndef _LIBCPP_MSVCRT
     return swprintf;
 #else
-    return static_cast<int (__cdecl*)(wchar_t* __restrict, size_t, const wchar_t*__restrict, ...)>(swprintf);
+    return static_cast<int (__cdecl*)(wchar_t* __restrict, size_t, const wchar_t*__restrict, ...)>(_snwprintf);
 #endif
 }
 
diff --git a/src/support/runtime/exception_pointer_msvc.ipp b/src/support/runtime/exception_pointer_msvc.ipp
index a8cd0e8d304d..eab5d30a9487 100644
--- a/src/support/runtime/exception_pointer_msvc.ipp
+++ b/src/support/runtime/exception_pointer_msvc.ipp
@@ -10,6 +10,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <yvals.h> // for _CRTIMP2_PURE
 
 _CRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL __ExceptionPtrCreate(_Out_ void*);
 _CRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL __ExceptionPtrDestroy(_Inout_ void*);
diff --git a/src/support/win32/locale_win32.cpp b/src/support/win32/locale_win32.cpp
index acbf79ac197c..28cb4491781f 100644
--- a/src/support/win32/locale_win32.cpp
+++ b/src/support/win32/locale_win32.cpp
@@ -13,14 +13,14 @@
 #include <memory>
 #include <type_traits>
 
-typedef _VSTD::remove_pointer<locale_t>::type __locale_struct;
-typedef _VSTD::unique_ptr<__locale_struct, decltype(&uselocale)> __locale_raii;
+using std::__libcpp_locale_guard;
 
 // FIXME: base currently unused. Needs manual work to construct the new locale
 locale_t newlocale( int mask, const char * locale, locale_t /*base*/ )
 {
     return _create_locale( mask, locale );
 }
+
 locale_t uselocale( locale_t newloc )
 {
     locale_t old_locale = _get_current_locale();
@@ -36,59 +36,59 @@ locale_t uselocale( locale_t newloc )
 }
 lconv *localeconv_l( locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return localeconv();
 }
 size_t mbrlen_l( const char *__restrict s, size_t n,
                  mbstate_t *__restrict ps, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return mbrlen( s, n, ps );
 }
 size_t mbsrtowcs_l( wchar_t *__restrict dst, const char **__restrict src,
                     size_t len, mbstate_t *__restrict ps, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return mbsrtowcs( dst, src, len, ps );
 }
 size_t wcrtomb_l( char *__restrict s, wchar_t wc, mbstate_t *__restrict ps,
                   locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return wcrtomb( s, wc, ps );
 }
 size_t mbrtowc_l( wchar_t *__restrict pwc, const char *__restrict s,
                   size_t n, mbstate_t *__restrict ps, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return mbrtowc( pwc, s, n, ps );
 }
 size_t mbsnrtowcs_l( wchar_t *__restrict dst, const char **__restrict src,
                      size_t nms, size_t len, mbstate_t *__restrict ps, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return mbsnrtowcs( dst, src, nms, len, ps );
 }
 size_t wcsnrtombs_l( char *__restrict dst, const wchar_t **__restrict src,
                      size_t nwc, size_t len, mbstate_t *__restrict ps, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return wcsnrtombs( dst, src, nwc, len, ps );
 }
 wint_t btowc_l( int c, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return btowc( c );
 }
 int wctob_l( wint_t c, locale_t loc )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return wctob( c );
 }
 
 int snprintf_l(char *ret, size_t n, locale_t loc, const char *format, ...)
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     va_list ap;
     va_start( ap, format );
     int result = vsnprintf( ret, n, format, ap );
@@ -106,6 +106,6 @@ int asprintf_l( char **ret, locale_t loc, const char *format, ... )
 }
 int vasprintf_l( char **ret, locale_t loc, const char *format, va_list ap )
 {
-    __locale_raii __current( uselocale(loc), uselocale );
+    __libcpp_locale_guard __current(loc);
     return vasprintf( ret, format, ap );
 }
diff --git a/src/thread.cpp b/src/thread.cpp
index 3f283c385b76..412a8fa0a620 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -99,7 +99,7 @@ thread::hardware_concurrency() _NOEXCEPT
 #else  // defined(CTL_HW) && defined(HW_NCPU)
     // TODO: grovel through /proc or check cpuid on x86 and similar
     // instructions on other architectures.
-#   if defined(_LIBCPP_MSVC)
+#   if defined(_LIBCPP_WARNING)
         _LIBCPP_WARNING("hardware_concurrency not yet implemented")
 #   else
 #       warning hardware_concurrency not yet implemented
diff --git a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp b/test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp
similarity index 89%
rename from test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp
rename to test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp
index 4d83296f086a..31a37229bf0c 100644
--- a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp
+++ b/test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/fopen.fail.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+// REQUIRES: libcpp-has-no-global-filesystem-namespace
+
 #include <cstdio>
 
 int main() {
diff --git a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp b/test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp
similarity index 89%
rename from test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp
rename to test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp
index deca9bf5b551..248ab4d67210 100644
--- a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp
+++ b/test/libcxx/input.output/file.streams/c.files/no.global.filesystem.namespace/rename.fail.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+// REQUIRES: libcpp-has-no-global-filesystem-namespace
+
 #include <cstdio>
 
 int main() {
diff --git a/test/std/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp b/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
similarity index 100%
rename from test/std/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
rename to test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
diff --git a/test/std/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp b/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
similarity index 99%
rename from test/std/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
rename to test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
index 891b084dd32e..b46c2cdec6cb 100644
--- a/test/std/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
+++ b/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
@@ -35,7 +35,6 @@
 
 void sig_action(int) {}
 
-#include <iostream>
 int main()
 {
     int ec;
diff --git a/test/std/utilities/function.objects/refwrap/binary.pass.cpp b/test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp
similarity index 100%
rename from test/std/utilities/function.objects/refwrap/binary.pass.cpp
rename to test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp
diff --git a/test/std/utilities/function.objects/refwrap/unary.pass.cpp b/test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp
similarity index 100%
rename from test/std/utilities/function.objects/refwrap/unary.pass.cpp
rename to test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp
diff --git a/test/libcxx/utilities/template.bitset/includes.pass.cpp b/test/libcxx/utilities/template.bitset/includes.pass.cpp
new file mode 100644
index 000000000000..2e3c2812e441
--- /dev/null
+++ b/test/libcxx/utilities/template.bitset/includes.pass.cpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// test that <bitset> includes <cstddef>, <string>, <stdexcept> and <iosfwd>
+
+#include <bitset>
+
+#ifndef _LIBCPP_CSTDDEF
+#error <cstddef> has not been included
+#endif
+
+#ifndef _LIBCPP_STRING
+#error <string> has not been included
+#endif
+
+#ifndef _LIBCPP_STDEXCEPT
+#error <stdexcept> has not been included
+#endif
+
+#ifndef _LIBCPP_IOSFWD
+#error <iosfwd> has not been included
+#endif
+
+int main()
+{
+}
diff --git a/test/std/utilities/tuple/tuple.tuple/empty_member.pass.cpp b/test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp
similarity index 100%
rename from test/std/utilities/tuple/tuple.tuple/empty_member.pass.cpp
rename to test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp
diff --git a/test/std/experimental/utilities/meta/meta.type.synop/meta.unary.prop.pass.cpp b/test/std/experimental/utilities/meta/meta.type.synop/meta.unary.prop.pass.cpp
index e267c6833ab9..bfd385a1b8e3 100644
--- a/test/std/experimental/utilities/meta/meta.type.synop/meta.unary.prop.pass.cpp
+++ b/test/std/experimental/utilities/meta/meta.type.synop/meta.unary.prop.pass.cpp
@@ -8,6 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++98, c++03, c++11
+
+// GCC returns true for __is_trivially_constructible(void, int)
+// See gcc.gnu.org/PR80682
+// NOTE: This has been fixed in trunk and will be backported soon.
+// XFAIL: gcc-7, gcc-6, gcc-5, gcc-4
+
 // <experimental/type_traits>
 
 #include <experimental/type_traits>
diff --git a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/lit.local.cfg b/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/lit.local.cfg
deleted file mode 100644
index 4ea670935591..000000000000
--- a/test/std/input.output/file.streams/c.files/no.global.filesystem.namespace/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if 'libcpp-has-no-global-filesystem-namespace' not in config.available_features:
-    config.unsupported = True
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
index 41b14a6e5bf3..36f815a819be 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp
@@ -15,7 +15,7 @@
 // XFAIL: clang-3, apple-clang-7, apple-clang-8
 
 // None of the current GCC compilers support this.
-// XFAIL: gcc
+// XFAIL: gcc-5, gcc-6
 
 // XFAIL: with_system_cxx_lib=macosx10.12
 // XFAIL: with_system_cxx_lib=macosx10.11
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
index 4c9da471294d..69f5ac8c9774 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp
@@ -13,7 +13,7 @@
 // UNSUPPORTED: sanitizer-new-delete
 
 // FIXME change this to XFAIL.
-// UNSUPPORTED: no-aligned-allocation
+// UNSUPPORTED: no-aligned-allocation && !gcc
 
 // XFAIL: with_system_cxx_lib=macosx10.12
 // XFAIL: with_system_cxx_lib=macosx10.11
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
index 8e9eff9e206e..36453283caf6 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp
@@ -13,7 +13,7 @@
 // UNSUPPORTED: sanitizer-new-delete
 
 // FIXME turn this into an XFAIL
-// UNSUPPORTED: no-aligned-allocation
+// UNSUPPORTED: no-aligned-allocation && !gcc
 
 // XFAIL: with_system_cxx_lib=macosx10.12
 // XFAIL: with_system_cxx_lib=macosx10.11
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
index 46f411d15684..228d176a27e0 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp
@@ -17,7 +17,7 @@
 // XFAIL: with_system_cxx_lib=macosx10.7
 // XFAIL: with_system_cxx_lib=macosx10.8
 
-// XFAIL: no-aligned-allocation
+// XFAIL: no-aligned-allocation && !gcc
 
 // On Windows libc++ doesn't provide its own definitions for new/delete
 // but instead depends on the ones in VCRuntime. However VCRuntime does not
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_replace.pass.cpp
index 131deb340d2f..15bebcbcbcb8 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_replace.pass.cpp
@@ -10,7 +10,8 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: sanitizer-new-delete
 
-// XFAIL: no-aligned-allocation
+// NOTE: GCC doesn't provide the -faligned-allocation flag to test for
+// XFAIL: no-aligned-allocation && !gcc
 
 // test operator new replacement
 
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
index 514e5e1044a0..b0e932c96cb0 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp
@@ -14,7 +14,7 @@
 // XFAIL: clang-3, apple-clang-7, apple-clang-8
 
 // None of the current GCC compilers support this.
-// XFAIL: gcc
+// XFAIL: gcc-5, gcc-6
 
 // XFAIL: with_system_cxx_lib=macosx10.12
 // XFAIL: with_system_cxx_lib=macosx10.11
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
index 2dcb2dc2c578..22ea5e2fff89 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp
@@ -20,7 +20,7 @@
 // UNSUPPORTED: sanitizer-new-delete
 
 // FIXME turn this into an XFAIL
-// UNSUPPORTED: no-aligned-allocation
+// UNSUPPORTED: no-aligned-allocation && !gcc
 
 // On Windows libc++ doesn't provide its own definitions for new/delete
 // but instead depends on the ones in VCRuntime. However VCRuntime does not
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
index e77b41336174..2c5c0f80b839 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp
@@ -20,7 +20,7 @@
 // UNSUPPORTED: sanitizer-new-delete
 
 // FIXME turn this into an XFAIL
-// UNSUPPORTED: no-aligned-allocation
+// UNSUPPORTED: no-aligned-allocation && !gcc
 
 // On Windows libc++ doesn't provide its own definitions for new/delete
 // but instead depends on the ones in VCRuntime. However VCRuntime does not
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
index 862ec1ba66a4..b50f49dc6ca7 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp
@@ -17,7 +17,8 @@
 // XFAIL: with_system_cxx_lib=macosx10.7
 // XFAIL: with_system_cxx_lib=macosx10.8
 
-// XFAIL: no-aligned-allocation
+// NOTE: gcc doesn't provide -faligned-allocation flag to test for
+// XFAIL: no-aligned-allocation && !gcc
 
 // On Windows libc++ doesn't provide its own definitions for new/delete
 // but instead depends on the ones in VCRuntime. However VCRuntime does not
diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_replace.pass.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_replace.pass.cpp
index df3e4c1560a9..66bb613adfd0 100644
--- a/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_replace.pass.cpp
+++ b/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_replace.pass.cpp
@@ -10,7 +10,8 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: sanitizer-new-delete
 
-// XFAIL: no-aligned-allocation
+// NOTE: GCC doesn't provide a -faligned-allocation flag
+// XFAIL: no-aligned-allocation && !gcc
 
 // test operator new replacement
 
diff --git a/test/std/language.support/support.types/byteops/and.assign.pass.cpp b/test/std/language.support/support.types/byteops/and.assign.pass.cpp
index dec241eb0c41..ec1c3805a6cb 100644
--- a/test/std/language.support/support.types/byteops/and.assign.pass.cpp
+++ b/test/std/language.support/support.types/byteops/and.assign.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte& operator &=(byte l, byte r) noexcept;
 
@@ -26,9 +23,9 @@ constexpr std::byte test(std::byte b1, std::byte b2) {
 
 int main () {
 	std::byte b;  // not constexpr, just used in noexcept check
-	constexpr std::byte b1{1};
-	constexpr std::byte b8{8};
-	constexpr std::byte b9{9};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
+	constexpr std::byte b9{static_cast<std::byte>(9)};
 
 	static_assert(noexcept(b &= b), "" );
 
diff --git a/test/std/language.support/support.types/byteops/and.pass.cpp b/test/std/language.support/support.types/byteops/and.pass.cpp
index 22da6e3e0d4c..6c5419cfc2a2 100644
--- a/test/std/language.support/support.types/byteops/and.pass.cpp
+++ b/test/std/language.support/support.types/byteops/and.pass.cpp
@@ -11,16 +11,13 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte operator&(byte l, byte r) noexcept;
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b8{8};
-	constexpr std::byte b9{9};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
+	constexpr std::byte b9{static_cast<std::byte>(9)};
 
 	static_assert(noexcept(b1 & b8), "" );
 
diff --git a/test/std/language.support/support.types/byteops/enum_direct_init.pass.cpp b/test/std/language.support/support.types/byteops/enum_direct_init.pass.cpp
new file mode 100644
index 000000000000..157626137330
--- /dev/null
+++ b/test/std/language.support/support.types/byteops/enum_direct_init.pass.cpp
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+#include <test_macros.h>
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// The following compilers don't like "std::byte b1{1}"
+// XFAIL: clang-3.5, clang-3.6, clang-3.7, clang-3.8
+// XFAIL: apple-clang-6, apple-clang-7, apple-clang-8.0
+
+int main () {
+  constexpr std::byte b{42};
+  static_assert(std::to_integer<int>(b) == 42, "");
+}
diff --git a/test/std/language.support/support.types/byteops/lshift.assign.pass.cpp b/test/std/language.support/support.types/byteops/lshift.assign.pass.cpp
index dad692e14a6b..abfeebf2b024 100644
--- a/test/std/language.support/support.types/byteops/lshift.assign.pass.cpp
+++ b/test/std/language.support/support.types/byteops/lshift.assign.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //   constexpr byte& operator<<=(byte& b, IntegerType shift) noexcept;
@@ -28,8 +25,8 @@ constexpr std::byte test(std::byte b) {
 
 int main () {
 	std::byte b;  // not constexpr, just used in noexcept check
-	constexpr std::byte b2{2};
-	constexpr std::byte b3{3};
+	constexpr std::byte b2{static_cast<std::byte>(2)};
+	constexpr std::byte b3{static_cast<std::byte>(3)};
 
 	static_assert(noexcept(b <<= 2), "" );
 
diff --git a/test/std/language.support/support.types/byteops/lshift.fail.cpp b/test/std/language.support/support.types/byteops/lshift.fail.cpp
index 6b1a68f83d25..707258c6df30 100644
--- a/test/std/language.support/support.types/byteops/lshift.fail.cpp
+++ b/test/std/language.support/support.types/byteops/lshift.fail.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr byte operator <<(byte b, IntegerType shift) noexcept;
@@ -21,6 +18,6 @@
 //   is_integral_v<IntegerType> is true.
 
 int main () {
-	constexpr std::byte b1{1};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
 	constexpr std::byte b2 = b1 << 2.0f;
 }
diff --git a/test/std/language.support/support.types/byteops/lshift.pass.cpp b/test/std/language.support/support.types/byteops/lshift.pass.cpp
index 39d659023ad8..15ad796cbe9e 100644
--- a/test/std/language.support/support.types/byteops/lshift.pass.cpp
+++ b/test/std/language.support/support.types/byteops/lshift.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr byte operator <<(byte b, IntegerType shift) noexcept;
@@ -21,8 +18,8 @@
 //   is_integral_v<IntegerType> is true.
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b3{3};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b3{static_cast<std::byte>(3)};
 
 	static_assert(noexcept(b3 << 2), "" );
 
diff --git a/test/std/language.support/support.types/byteops/not.pass.cpp b/test/std/language.support/support.types/byteops/not.pass.cpp
index 734780f194a0..53f1d91faf23 100644
--- a/test/std/language.support/support.types/byteops/not.pass.cpp
+++ b/test/std/language.support/support.types/byteops/not.pass.cpp
@@ -11,16 +11,13 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte operator~(byte b) noexcept;
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b2{2};
-	constexpr std::byte b8{8};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b2{static_cast<std::byte>(2)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
 
 	static_assert(noexcept(~b1), "" );
 
diff --git a/test/std/language.support/support.types/byteops/or.assign.pass.cpp b/test/std/language.support/support.types/byteops/or.assign.pass.cpp
index 75d6ab4d0a9d..bb4a84ec6c76 100644
--- a/test/std/language.support/support.types/byteops/or.assign.pass.cpp
+++ b/test/std/language.support/support.types/byteops/or.assign.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte& operator |=(byte l, byte r) noexcept;
 
@@ -26,9 +23,9 @@ constexpr std::byte test(std::byte b1, std::byte b2) {
 
 int main () {
 	std::byte b;  // not constexpr, just used in noexcept check
-	constexpr std::byte b1{1};
-	constexpr std::byte b2{2};
-	constexpr std::byte b8{8};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b2{static_cast<std::byte>(2)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
 
 	static_assert(noexcept(b |= b), "" );
 
diff --git a/test/std/language.support/support.types/byteops/or.pass.cpp b/test/std/language.support/support.types/byteops/or.pass.cpp
index 02c547f1dbb6..e2b734982c09 100644
--- a/test/std/language.support/support.types/byteops/or.pass.cpp
+++ b/test/std/language.support/support.types/byteops/or.pass.cpp
@@ -11,16 +11,13 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte operator|(byte l, byte r) noexcept;
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b2{2};
-	constexpr std::byte b8{8};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b2{static_cast<std::byte>(2)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
 
 	static_assert(noexcept(b1 | b2), "" );
 
diff --git a/test/std/language.support/support.types/byteops/rshift.assign.pass.cpp b/test/std/language.support/support.types/byteops/rshift.assign.pass.cpp
index b7e9a24f20a1..82c611cfdfcb 100644
--- a/test/std/language.support/support.types/byteops/rshift.assign.pass.cpp
+++ b/test/std/language.support/support.types/byteops/rshift.assign.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //   constexpr byte& operator>>=(byte& b, IntegerType shift) noexcept;
@@ -28,8 +25,8 @@ constexpr std::byte test(std::byte b) {
 
 int main () {
 	std::byte b;  // not constexpr, just used in noexcept check
-	constexpr std::byte b16{16};
-	constexpr std::byte b192{192};
+	constexpr std::byte b16{static_cast<std::byte>(16)};
+	constexpr std::byte b192{static_cast<std::byte>(192)};
 
 	static_assert(noexcept(b >>= 2), "" );
 
diff --git a/test/std/language.support/support.types/byteops/rshift.fail.cpp b/test/std/language.support/support.types/byteops/rshift.fail.cpp
index a0309539a7a6..8e142dd90a5c 100644
--- a/test/std/language.support/support.types/byteops/rshift.fail.cpp
+++ b/test/std/language.support/support.types/byteops/rshift.fail.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr byte operator >>(byte b, IntegerType shift) noexcept;
@@ -21,6 +18,6 @@
 //   is_integral_v<IntegerType> is true.
 
 int main () {
-	constexpr std::byte b1{1};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
 	constexpr std::byte b2 = b1 >> 2.0f;
 }
diff --git a/test/std/language.support/support.types/byteops/rshift.pass.cpp b/test/std/language.support/support.types/byteops/rshift.pass.cpp
index 876732545ac8..22d399130bbc 100644
--- a/test/std/language.support/support.types/byteops/rshift.pass.cpp
+++ b/test/std/language.support/support.types/byteops/rshift.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr byte operator <<(byte b, IntegerType shift) noexcept;
@@ -27,8 +24,8 @@ constexpr std::byte test(std::byte b) {
 
 
 int main () {
-	constexpr std::byte b100{100};
-	constexpr std::byte b115{115};
+	constexpr std::byte b100{static_cast<std::byte>(100)};
+	constexpr std::byte b115{static_cast<std::byte>(115)};
 
 	static_assert(noexcept(b100 << 2), "" );
 
diff --git a/test/std/language.support/support.types/byteops/to_integer.fail.cpp b/test/std/language.support/support.types/byteops/to_integer.fail.cpp
index 426ceb7a67de..fb4928956f55 100644
--- a/test/std/language.support/support.types/byteops/to_integer.fail.cpp
+++ b/test/std/language.support/support.types/byteops/to_integer.fail.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr IntegerType to_integer(byte b) noexcept;
@@ -21,6 +18,6 @@
 //   is_integral_v<IntegerType> is true.
 
 int main () {
-	constexpr std::byte b1{1};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
 	auto f = std::to_integer<float>(b1);
 }
diff --git a/test/std/language.support/support.types/byteops/to_integer.pass.cpp b/test/std/language.support/support.types/byteops/to_integer.pass.cpp
index 21dff0196a43..4f34363447f6 100644
--- a/test/std/language.support/support.types/byteops/to_integer.pass.cpp
+++ b/test/std/language.support/support.types/byteops/to_integer.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // template <class IntegerType>
 //    constexpr IntegerType to_integer(byte b) noexcept;
@@ -21,8 +18,8 @@
 //   is_integral_v<IntegerType> is true.
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b3{3};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b3{static_cast<std::byte>(3)};
 
 	static_assert(noexcept(std::to_integer<int>(b1)), "" );
 	static_assert(std::is_same<int, decltype(std::to_integer<int>(b1))>::value, "" );
diff --git a/test/std/language.support/support.types/byteops/xor.assign.pass.cpp b/test/std/language.support/support.types/byteops/xor.assign.pass.cpp
index c9b40177a17c..6a526da3d714 100644
--- a/test/std/language.support/support.types/byteops/xor.assign.pass.cpp
+++ b/test/std/language.support/support.types/byteops/xor.assign.pass.cpp
@@ -11,9 +11,6 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte& operator ^=(byte l, byte r) noexcept;
 
@@ -26,9 +23,9 @@ constexpr std::byte test(std::byte b1, std::byte b2) {
 
 int main () {
 	std::byte b;  // not constexpr, just used in noexcept check
-	constexpr std::byte b1{1};
-	constexpr std::byte b8{8};
-	constexpr std::byte b9{9};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
+	constexpr std::byte b9{static_cast<std::byte>(9)};
 
 	static_assert(noexcept(b ^= b), "" );
 
diff --git a/test/std/language.support/support.types/byteops/xor.pass.cpp b/test/std/language.support/support.types/byteops/xor.pass.cpp
index 3d0402b30a55..e8c3b9875785 100644
--- a/test/std/language.support/support.types/byteops/xor.pass.cpp
+++ b/test/std/language.support/support.types/byteops/xor.pass.cpp
@@ -11,16 +11,13 @@
 #include <test_macros.h>
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
-// The following compilers don't like "std::byte b1{1}"
-// UNSUPPORTED: clang-3.5, clang-3.6, clang-3.7, clang-3.8
-// UNSUPPORTED: apple-clang-6, apple-clang-7, apple-clang-8.0
 
 // constexpr byte operator^(byte l, byte r) noexcept;
 
 int main () {
-	constexpr std::byte b1{1};
-	constexpr std::byte b8{8};
-	constexpr std::byte b9{9};
+	constexpr std::byte b1{static_cast<std::byte>(1)};
+	constexpr std::byte b8{static_cast<std::byte>(8)};
+	constexpr std::byte b9{static_cast<std::byte>(9)};
 
 	static_assert(noexcept(b1 ^ b8), "" );
 
diff --git a/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp b/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
index 2eb6978b0e0a..b90c41e59d0d 100644
--- a/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
+++ b/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
@@ -25,307 +25,329 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        typedef std::codecvt_utf16<wchar_t> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xD8));
-        assert(n[1] == char(0xC0));
-        assert(n[2] == char(0xDC));
-        assert(n[3] == char(0x03));
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
 
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0x10));
-        assert(n[1] == char(0x05));
-        assert(n[2] == char(0xDC));
-        assert(n[3] == char(0x03));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0x04));
-        assert(n[1] == char(0x53));
-        assert(n[2] == char(0xDC));
-        assert(n[3] == char(0x03));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0x00));
-        assert(n[1] == char(0x56));
-        assert(n[2] == char(0xDC));
-        assert(n[3] == char(0x03));
-    }
-    {
-        typedef std::codecvt_utf16<wchar_t, 0x1000> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0x04));
-        assert(n[1] == char(0x53));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0x00));
-        assert(n[1] == char(0x56));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-    }
-    {
-        typedef std::codecvt_utf16<wchar_t, 0x10ffff, std::generate_header> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[6] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xFE));
-        assert(n[1] == char(0xFF));
-        assert(n[2] == char(0xD8));
-        assert(n[3] == char(0xC0));
-        assert(n[4] == char(0xDC));
-        assert(n[5] == char(0x03));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xFE));
-        assert(n[1] == char(0xFF));
-        assert(n[2] == char(0x10));
-        assert(n[3] == char(0x05));
-        assert(n[4] == char(0xDC));
-        assert(n[5] == char(0x03));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xFE));
-        assert(n[1] == char(0xFF));
-        assert(n[2] == char(0x04));
-        assert(n[3] == char(0x53));
-        assert(n[4] == char(0xDC));
-        assert(n[5] == char(0x03));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xFE));
-        assert(n[1] == char(0xFF));
-        assert(n[2] == char(0x00));
-        assert(n[3] == char(0x56));
-        assert(n[4] == char(0xDC));
-        assert(n[5] == char(0x03));
-    }
-
-    {
-        typedef std::codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[1] == char(0xD8));
-        assert(n[0] == char(0xC0));
-        assert(n[3] == char(0xDC));
-        assert(n[2] == char(0x03));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[1] == char(0x10));
-        assert(n[0] == char(0x05));
-        assert(n[3] == char(0xDC));
-        assert(n[2] == char(0x03));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[1] == char(0x04));
-        assert(n[0] == char(0x53));
-        assert(n[3] == char(0xDC));
-        assert(n[2] == char(0x03));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[1] == char(0x00));
-        assert(n[0] == char(0x56));
-        assert(n[3] == char(0xDC));
-        assert(n[2] == char(0x03));
-    }
-    {
-        typedef std::codecvt_utf16<wchar_t, 0x1000, std::little_endian> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[1] == char(0));
-        assert(n[0] == char(0));
-        assert(n[3] == char(0));
-        assert(n[2] == char(0));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[1] == char(0));
-        assert(n[0] == char(0));
-        assert(n[3] == char(0));
-        assert(n[2] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[1] == char(0x04));
-        assert(n[0] == char(0x53));
-        assert(n[3] == char(0));
-        assert(n[2] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[1] == char(0x00));
-        assert(n[0] == char(0x56));
-        assert(n[3] == char(0));
-        assert(n[2] == char(0));
-    }
-    {
-        typedef std::codecvt_utf16<wchar_t, 0x10ffff, std::codecvt_mode(
-                                                         std::generate_header |
-                                                         std::little_endian)> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[6] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+6);
-        assert(n[1] == char(0xFE));
-        assert(n[0] == char(0xFF));
-        assert(n[3] == char(0xD8));
-        assert(n[2] == char(0xC0));
-        assert(n[5] == char(0xDC));
-        assert(n[4] == char(0x03));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[1] == char(0xFE));
-        assert(n[0] == char(0xFF));
-        assert(n[3] == char(0x10));
-        assert(n[2] == char(0x05));
-        assert(n[5] == char(0xDC));
-        assert(n[4] == char(0x03));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[1] == char(0xFE));
-        assert(n[0] == char(0xFF));
-        assert(n[3] == char(0x04));
-        assert(n[2] == char(0x53));
-        assert(n[5] == char(0xDC));
-        assert(n[4] == char(0x03));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+6, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[1] == char(0xFE));
-        assert(n[0] == char(0xFF));
-        assert(n[3] == char(0x00));
-        assert(n[2] == char(0x56));
-        assert(n[5] == char(0xDC));
-        assert(n[4] == char(0x03));
-    }
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  // Nothing to do, the conversion in unsupported
+}
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  {
+    typedef std::codecvt_utf16<CharT> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xD8));
+    assert(n[1] == char(0xC0));
+    assert(n[2] == char(0xDC));
+    assert(n[3] == char(0x03));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0x10));
+    assert(n[1] == char(0x05));
+    assert(n[2] == char(0xDC));
+    assert(n[3] == char(0x03));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0x04));
+    assert(n[1] == char(0x53));
+    assert(n[2] == char(0xDC));
+    assert(n[3] == char(0x03));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0x00));
+    assert(n[1] == char(0x56));
+    assert(n[2] == char(0xDC));
+    assert(n[3] == char(0x03));
+  }
+  {
+    typedef std::codecvt_utf16<CharT, 0x1000> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[0] == char(0));
+    assert(n[1] == char(0));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[0] == char(0));
+    assert(n[1] == char(0));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0x04));
+    assert(n[1] == char(0x53));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0x00));
+    assert(n[1] == char(0x56));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+  }
+  {
+    typedef std::codecvt_utf16<CharT, 0x10ffff, std::generate_header> C;
+    C c;
+    CharT w = 0x40003;
+    char n[6] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 6);
+    assert(n[0] == char(0xFE));
+    assert(n[1] == char(0xFF));
+    assert(n[2] == char(0xD8));
+    assert(n[3] == char(0xC0));
+    assert(n[4] == char(0xDC));
+    assert(n[5] == char(0x03));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xFE));
+    assert(n[1] == char(0xFF));
+    assert(n[2] == char(0x10));
+    assert(n[3] == char(0x05));
+    assert(n[4] == char(0xDC));
+    assert(n[5] == char(0x03));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xFE));
+    assert(n[1] == char(0xFF));
+    assert(n[2] == char(0x04));
+    assert(n[3] == char(0x53));
+    assert(n[4] == char(0xDC));
+    assert(n[5] == char(0x03));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xFE));
+    assert(n[1] == char(0xFF));
+    assert(n[2] == char(0x00));
+    assert(n[3] == char(0x56));
+    assert(n[4] == char(0xDC));
+    assert(n[5] == char(0x03));
+  }
+
+  {
+    typedef std::codecvt_utf16<CharT, 0x10FFFF, std::little_endian> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[1] == char(0xD8));
+    assert(n[0] == char(0xC0));
+    assert(n[3] == char(0xDC));
+    assert(n[2] == char(0x03));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[1] == char(0x10));
+    assert(n[0] == char(0x05));
+    assert(n[3] == char(0xDC));
+    assert(n[2] == char(0x03));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[1] == char(0x04));
+    assert(n[0] == char(0x53));
+    assert(n[3] == char(0xDC));
+    assert(n[2] == char(0x03));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[1] == char(0x00));
+    assert(n[0] == char(0x56));
+    assert(n[3] == char(0xDC));
+    assert(n[2] == char(0x03));
+  }
+  {
+    typedef std::codecvt_utf16<CharT, 0x1000, std::little_endian> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[1] == char(0));
+    assert(n[0] == char(0));
+    assert(n[3] == char(0));
+    assert(n[2] == char(0));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[1] == char(0));
+    assert(n[0] == char(0));
+    assert(n[3] == char(0));
+    assert(n[2] == char(0));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[1] == char(0x04));
+    assert(n[0] == char(0x53));
+    assert(n[3] == char(0));
+    assert(n[2] == char(0));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[1] == char(0x00));
+    assert(n[0] == char(0x56));
+    assert(n[3] == char(0));
+    assert(n[2] == char(0));
+  }
+  {
+    typedef std::codecvt_utf16<CharT, 0x10ffff,
+                               std::codecvt_mode(std::generate_header |
+                                                 std::little_endian)>
+        C;
+    C c;
+    CharT w = 0x40003;
+    char n[6] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 6);
+    assert(n[1] == char(0xFE));
+    assert(n[0] == char(0xFF));
+    assert(n[3] == char(0xD8));
+    assert(n[2] == char(0xC0));
+    assert(n[5] == char(0xDC));
+    assert(n[4] == char(0x03));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[1] == char(0xFE));
+    assert(n[0] == char(0xFF));
+    assert(n[3] == char(0x10));
+    assert(n[2] == char(0x05));
+    assert(n[5] == char(0xDC));
+    assert(n[4] == char(0x03));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[1] == char(0xFE));
+    assert(n[0] == char(0xFF));
+    assert(n[3] == char(0x04));
+    assert(n[2] == char(0x53));
+    assert(n[5] == char(0xDC));
+    assert(n[4] == char(0x03));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 6, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[1] == char(0xFE));
+    assert(n[0] == char(0xFF));
+    assert(n[3] == char(0x00));
+    assert(n[2] == char(0x56));
+    assert(n[5] == char(0xDC));
+    assert(n[4] == char(0x03));
+  }
+}
+
+int main() {
+  TestHelper<char32_t>::test();
+  TestHelper<wchar_t>::test();
 }
diff --git a/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp b/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
index 02cf7cf958e7..886fc4416bbb 100644
--- a/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
+++ b/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
@@ -25,432 +25,302 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        typedef std::codecvt_utf8<wchar_t> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xF1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x80));
-        assert(n[3] == char(0x83));
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
 
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
 
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
 
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
-    }
-    {
-        typedef std::codecvt_utf8<wchar_t, 0x1000> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  {
+    typedef std::codecvt_utf8<CharT> C;
+    C c;
+    CharT w = 0x1005;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 3);
+    assert(n[0] == char(0xE1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0));
 
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0));
 
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0));
+  }
+  {
+    typedef std::codecvt_utf8<CharT, 0x1000> C;
+    C c;
+    CharT w = 0x1005;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[0] == char(0));
+    assert(n[1] == char(0));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
 
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-    }
-    {
-        typedef std::codecvt_utf8<wchar_t, 0xFFFFFFFF, std::generate_header> C;
-        C c;
-        wchar_t w = 0x40003;
-        char n[7] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+7);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xF1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x80));
-        assert(n[6] == char(0x83));
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
 
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+  }
+  {
+    typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::generate_header> C;
+    C c;
+    CharT w = 0x1005;
+    char n[7] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 6);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xE1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0));
 
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 5);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xD1));
+    assert(n[4] == char(0x93));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0));
 
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
-    }
-    {
-        typedef std::codecvt_utf8<char32_t> C;
-        C c;
-        char32_t w = 0x40003;
-        char n[4] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xF1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x80));
-        assert(n[3] == char(0x83));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0x83));
-    }
-    {
-        typedef std::codecvt_utf8<char32_t, 0x1000> C;
-        C c;
-        char32_t w = 0x40003;
-        char n[4] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-    }
-    {
-        typedef std::codecvt_utf8<char32_t, 0xFFFFFFFF, std::generate_header> C;
-        C c;
-        char32_t w = 0x40003;
-        char n[7] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+7);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xF1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x80));
-        assert(n[6] == char(0x83));
-
-        w = 0x1005;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0x83));
-    }
-    {
-        typedef std::codecvt_utf8<char16_t> C;
-        C c;
-        char16_t w = 0x1005;
-        char n[4] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0x85));
-        assert(n[3] == char(0));
-    }
-    {
-        typedef std::codecvt_utf8<char16_t, 0x1000> C;
-        C c;
-        char16_t w = 0x1005;
-        char n[4] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == &w);
-        assert(np == n);
-        assert(n[0] == char(0));
-        assert(n[1] == char(0));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-        assert(n[1] == char(0x93));
-        assert(n[2] == char(0));
-        assert(n[3] == char(0));
-    }
-    {
-        typedef std::codecvt_utf8<char16_t, 0xFFFFFFFF, std::generate_header> C;
-        C c;
-        char16_t w = 0x1005;
-        char n[7] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0));
-
-        w = 0x453;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0));
-
-        w = 0x56;
-        r = c.out(m, &w, &w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == &w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-        assert(n[4] == char(0x93));
-        assert(n[5] == char(0x85));
-        assert(n[6] == char(0));
-    }
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0x56));
+    assert(n[4] == char(0x93));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0));
+  }
+}
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  {
+    typedef std::codecvt_utf8<CharT> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xF1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x80));
+    assert(n[3] == char(0x83));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 3);
+    assert(n[0] == char(0xE1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0x83));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0x83));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0x85));
+    assert(n[3] == char(0x83));
+  }
+  {
+    typedef std::codecvt_utf8<CharT, 0x1000> C;
+    C c;
+    CharT w = 0x40003;
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[0] == char(0));
+    assert(n[1] == char(0));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == &w);
+    assert(np == n);
+    assert(n[0] == char(0));
+    assert(n[1] == char(0));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+    assert(n[1] == char(0x93));
+    assert(n[2] == char(0));
+    assert(n[3] == char(0));
+  }
+  {
+    typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::generate_header> C;
+    C c;
+    CharT w = 0x40003;
+    char n[7] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 7);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xF1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x80));
+    assert(n[6] == char(0x83));
+
+    w = 0x1005;
+    r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 6);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xE1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0x83));
+
+    w = 0x453;
+    r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 5);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xD1));
+    assert(n[4] == char(0x93));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0x83));
+
+    w = 0x56;
+    r = c.out(m, &w, &w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == &w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0x56));
+    assert(n[4] == char(0x93));
+    assert(n[5] == char(0x85));
+    assert(n[6] == char(0x83));
+  }
+}
+
+int main() {
+  TestHelper<wchar_t>::test();
+  TestHelper<char32_t>::test();
+  TestHelper<char16_t>::test();
 }
diff --git a/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp b/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
index 0cd941764aa0..392d66f2298c 100644
--- a/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
+++ b/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
@@ -25,348 +25,220 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t> C;
-        C c;
-        wchar_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
 
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x1000> C;
+    C c;
+    CharT w[2] = {0};
+    char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
+    CharT* wp = nullptr;
+    std::mbstate_t m;
+    const char* np = nullptr;
+    std::codecvt_base::result r = c.in(m, n, n + 4, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
 
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
+    n[0] = char(0xE1);
+    n[1] = char(0x80);
+    n[2] = char(0x85);
+    r = c.in(m, n, n + 3, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
 
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t, 0x1000> C;
-        C c;
-        wchar_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
+    n[0] = char(0xD1);
+    n[1] = char(0x93);
+    r = c.in(m, n, n + 2, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(w[0] == 0x0453);
 
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
+    n[0] = char(0x56);
+    r = c.in(m, n, n + 1, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(w[0] == 0x0056);
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x10ffff, std::consume_header> C;
+    C c;
+    CharT w[2] = {0};
+    char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1),
+                 char(0x80), char(0x80), char(0x83)};
+    CharT* wp = nullptr;
+    std::mbstate_t m;
+    const char* np = nullptr;
+    std::codecvt_base::result r = c.in(m, n, n + 7, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 7);
+    assert(w[0] == 0xD8C0);
+    assert(w[1] == 0xDC03);
 
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
+    n[0] = char(0xE1);
+    n[1] = char(0x80);
+    n[2] = char(0x85);
+    r = c.in(m, n, n + 3, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 3);
+    assert(w[0] == 0x1005);
 
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t, 0x10ffff, std::consume_header> C;
-        C c;
-        wchar_t w[2] = {0};
-        char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
-        wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
+    n[0] = char(0xD1);
+    n[1] = char(0x93);
+    r = c.in(m, n, n + 2, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(w[0] == 0x0453);
 
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t> C;
-        C c;
-        char32_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char32_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t, 0x1000> C;
-        C c;
-        char32_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char32_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t, 0x10ffff, std::consume_header> C;
-        C c;
-        char32_t w[2] = {0};
-        char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char32_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char16_t> C;
-        C c;
-        char16_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char16_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char16_t, 0x1000> C;
-        C c;
-        char16_t w[2] = {0};
-        char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char16_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+4, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char16_t, 0x10ffff, std::consume_header> C;
-        C c;
-        char16_t w[2] = {0};
-        char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
-        char16_t* wp = nullptr;
-        std::mbstate_t m;
-        const char* np = nullptr;
-        std::codecvt_base::result r = c.in(m, n, n+7, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(w[0] == 0xD8C0);
-        assert(w[1] == 0xDC03);
-
-        n[0] = char(0xE1);
-        n[1] = char(0x80);
-        n[2] = char(0x85);
-        r = c.in(m, n, n+3, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(w[0] == 0x1005);
-
-        n[0] = char(0xD1);
-        n[1] = char(0x93);
-        r = c.in(m, n, n+2, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(w[0] == 0x0453);
-
-        n[0] = char(0x56);
-        r = c.in(m, n, n+1, np, w, w+2, wp);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(w[0] == 0x0056);
-    }
+    n[0] = char(0x56);
+    r = c.in(m, n, n + 1, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(w[0] == 0x0056);
+  }
+}
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  {
+    typedef std::codecvt_utf8_utf16<CharT> C;
+    C c;
+    CharT w[2] = {0};
+    char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
+    CharT* wp = nullptr;
+    std::mbstate_t m;
+    const char* np = nullptr;
+    std::codecvt_base::result r = c.in(m, n, n + 4, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 4);
+    assert(w[0] == 0xD8C0);
+    assert(w[1] == 0xDC03);
+
+    n[0] = char(0xE1);
+    n[1] = char(0x80);
+    n[2] = char(0x85);
+    r = c.in(m, n, n + 3, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 3);
+    assert(w[0] == 0x1005);
+
+    n[0] = char(0xD1);
+    n[1] = char(0x93);
+    r = c.in(m, n, n + 2, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(w[0] == 0x0453);
+
+    n[0] = char(0x56);
+    r = c.in(m, n, n + 1, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(w[0] == 0x0056);
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x1000> C;
+    C c;
+    CharT w[2] = {0};
+    char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
+    CharT* wp = nullptr;
+    std::mbstate_t m;
+    const char* np = nullptr;
+    std::codecvt_base::result r = c.in(m, n, n + 4, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
+
+    n[0] = char(0xE1);
+    n[1] = char(0x80);
+    n[2] = char(0x85);
+    r = c.in(m, n, n + 3, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
+
+    n[0] = char(0xD1);
+    n[1] = char(0x93);
+    r = c.in(m, n, n + 2, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(w[0] == 0x0453);
+
+    n[0] = char(0x56);
+    r = c.in(m, n, n + 1, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(w[0] == 0x0056);
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x10ffff, std::consume_header> C;
+    C c;
+    CharT w[2] = {0};
+    char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1),
+                 char(0x80), char(0x80), char(0x83)};
+    CharT* wp = nullptr;
+    std::mbstate_t m;
+    const char* np = nullptr;
+    std::codecvt_base::result r = c.in(m, n, n + 7, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 7);
+    assert(w[0] == 0xD8C0);
+    assert(w[1] == 0xDC03);
+
+    n[0] = char(0xE1);
+    n[1] = char(0x80);
+    n[2] = char(0x85);
+    r = c.in(m, n, n + 3, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 3);
+    assert(w[0] == 0x1005);
+
+    n[0] = char(0xD1);
+    n[1] = char(0x93);
+    r = c.in(m, n, n + 2, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(w[0] == 0x0453);
+
+    n[0] = char(0x56);
+    r = c.in(m, n, n + 1, np, w, w + 2, wp);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(w[0] == 0x0056);
+  }
+}
+
+int main() {
+#ifndef _WIN32
+  TestHelper<wchar_t>::test();
+#endif
+  TestHelper<char32_t>::test();
+  TestHelper<char16_t>::test();
 }
diff --git a/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp b/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
index 29c5342e4545..ced2a36a4fec 100644
--- a/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
+++ b/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
@@ -25,391 +25,285 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t> C;
-        C c;
-        wchar_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(n[0] == char(0xF1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x80));
-        assert(n[3] == char(0x83));
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
 
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  {
+    typedef std::codecvt_utf8_utf16<CharT> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 4);
+    assert(n[0] == char(0xF1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x80));
+    assert(n[3] == char(0x83));
 
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 3);
+    assert(n[0] == char(0xE1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x85));
 
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t, 0x1000> C;
-        C c;
-        wchar_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
 
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x1000> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
 
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
 
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<wchar_t, 0x10ffff, std::generate_header> C;
-        C c;
-        wchar_t w[2] = {0xD8C0, 0xDC03};
-        char n[7] = {0};
-        const wchar_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xF1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x80));
-        assert(n[6] == char(0x83));
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
 
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x10ffff, std::generate_header> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[7] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 7);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xF1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x80));
+    assert(n[6] == char(0x83));
 
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 6);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xE1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x85));
 
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t> C;
-        C c;
-        char32_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(n[0] == char(0xF1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x80));
-        assert(n[3] == char(0x83));
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 5);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xD1));
+    assert(n[4] == char(0x93));
 
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t, 0x1000> C;
-        C c;
-        char32_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char32_t, 0x10ffff, std::generate_header> C;
-        C c;
-        char32_t w[2] = {0xD8C0, 0xDC03};
-        char n[7] = {0};
-        const char32_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xF1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x80));
-        assert(n[6] == char(0x83));
-
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-    }
-
-    {
-        typedef std::codecvt_utf8_utf16<char16_t> C;
-        C c;
-        char16_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+4);
-        assert(n[0] == char(0xF1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x80));
-        assert(n[3] == char(0x83));
-
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+3);
-        assert(n[0] == char(0xE1));
-        assert(n[1] == char(0x80));
-        assert(n[2] == char(0x85));
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char16_t, 0x1000> C;
-        C c;
-        char16_t w[2] = {0xD8C0, 0xDC03};
-        char n[4] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::error);
-        assert(wp == w);
-        assert(np == n);
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+2);
-        assert(n[0] == char(0xD1));
-        assert(n[1] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+4, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+1);
-        assert(n[0] == char(0x56));
-    }
-    {
-        typedef std::codecvt_utf8_utf16<char16_t, 0x10ffff, std::generate_header> C;
-        C c;
-        char16_t w[2] = {0xD8C0, 0xDC03};
-        char n[7] = {0};
-        const char16_t* wp = nullptr;
-        std::mbstate_t m;
-        char* np = nullptr;
-        std::codecvt_base::result r = c.out(m, w, w+2, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+2);
-        assert(np == n+7);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xF1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x80));
-        assert(n[6] == char(0x83));
-
-        w[0] = 0x1005;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+6);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xE1));
-        assert(n[4] == char(0x80));
-        assert(n[5] == char(0x85));
-
-        w[0] = 0x453;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+5);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0xD1));
-        assert(n[4] == char(0x93));
-
-        w[0] = 0x56;
-        r = c.out(m, w, w+1, wp, n, n+7, np);
-        assert(r == std::codecvt_base::ok);
-        assert(wp == w+1);
-        assert(np == n+4);
-        assert(n[0] == char(0xEF));
-        assert(n[1] == char(0xBB));
-        assert(n[2] == char(0xBF));
-        assert(n[3] == char(0x56));
-    }
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0x56));
+  }
+}
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  {
+    typedef std::codecvt_utf8_utf16<CharT> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 4);
+    assert(n[0] == char(0xF1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x80));
+    assert(n[3] == char(0x83));
+
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 3);
+    assert(n[0] == char(0xE1));
+    assert(n[1] == char(0x80));
+    assert(n[2] == char(0x85));
+
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x1000> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[4] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
+
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::error);
+    assert(wp == w);
+    assert(np == n);
+
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 2);
+    assert(n[0] == char(0xD1));
+    assert(n[1] == char(0x93));
+
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 4, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 1);
+    assert(n[0] == char(0x56));
+  }
+  {
+    typedef std::codecvt_utf8_utf16<CharT, 0x10ffff, std::generate_header> C;
+    C c;
+    CharT w[2] = {0xD8C0, 0xDC03};
+    char n[7] = {0};
+    const CharT* wp = nullptr;
+    std::mbstate_t m;
+    char* np = nullptr;
+    std::codecvt_base::result r = c.out(m, w, w + 2, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 2);
+    assert(np == n + 7);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xF1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x80));
+    assert(n[6] == char(0x83));
+
+    w[0] = 0x1005;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 6);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xE1));
+    assert(n[4] == char(0x80));
+    assert(n[5] == char(0x85));
+
+    w[0] = 0x453;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 5);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0xD1));
+    assert(n[4] == char(0x93));
+
+    w[0] = 0x56;
+    r = c.out(m, w, w + 1, wp, n, n + 7, np);
+    assert(r == std::codecvt_base::ok);
+    assert(wp == w + 1);
+    assert(np == n + 4);
+    assert(n[0] == char(0xEF));
+    assert(n[1] == char(0xBB));
+    assert(n[2] == char(0xBF));
+    assert(n[3] == char(0x56));
+  }
+}
+
+int main() {
+#ifndef _WIN32
+  TestHelper<wchar_t>::test();
+#endif
+  TestHelper<char32_t>::test();
+  TestHelper<char16_t>::test();
 }
diff --git a/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
index 06df185757d2..480628f707e2 100644
--- a/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
+++ b/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
@@ -17,9 +17,39 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    typedef std::codecvt_utf8<wchar_t> Codecvt;
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
+
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    typedef std::codecvt_utf8<CharT> Codecvt;
+    typedef std::wstring_convert<Codecvt> Myconv;
+    Myconv myconv;
+    assert(myconv.converted() == 0);
+    std::string bs = myconv.to_bytes(L"\x1005");
+    assert(myconv.converted() == 1);
+    bs = myconv.to_bytes(L"\x1005\x65");
+    assert(myconv.converted() == 2);
+    std::wstring ws = myconv.from_bytes("\xE1\x80\x85");
+    assert(myconv.converted() == 3);
+  }
+}
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    typedef std::codecvt_utf8<CharT> Codecvt;
     typedef std::wstring_convert<Codecvt> Myconv;
     Myconv myconv;
     assert(myconv.converted() == 0);
@@ -29,4 +59,7 @@ int main()
     assert(myconv.converted() == 2);
     std::wstring ws = myconv.from_bytes("\xF1\x80\x80\x83");
     assert(myconv.converted() == 4);
+  }
 }
+
+int main() { TestHelper<wchar_t>::test(); }
diff --git a/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
index 8705a5f3bced..2e627b739928 100644
--- a/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
+++ b/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
@@ -20,20 +20,53 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        std::wstring_convert<std::codecvt_utf8<wchar_t> > myconv;
-        std::string bs("\xF1\x80\x80\x83");
-        std::wstring ws = myconv.from_bytes('a');
-        assert(ws == L"a");
-        ws = myconv.from_bytes(bs.c_str());
-        assert(ws == L"\x40003");
-        ws = myconv.from_bytes(bs);
-        assert(ws == L"\x40003");
-        ws = myconv.from_bytes(bs.data(), bs.data() + bs.size());
-        assert(ws == L"\x40003");
-        ws = myconv.from_bytes("");
-        assert(ws.size() == 0);
-    }
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
+
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    std::wstring_convert<std::codecvt_utf8<CharT> > myconv;
+    std::string bs("\xE1\x80\x85\x00");
+    std::wstring ws = myconv.from_bytes('a');
+    assert(ws == L"a");
+    ws = myconv.from_bytes(bs.c_str());
+    assert(ws == L"\x1005");
+    ws = myconv.from_bytes(bs);
+    assert(ws == L"\x1005");
+    ws = myconv.from_bytes(bs.data(), bs.data() + bs.size());
+    assert(ws == L"\x1005");
+    ws = myconv.from_bytes("");
+    assert(ws.size() == 0);
+  }
 }
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    std::wstring_convert<std::codecvt_utf8<CharT> > myconv;
+    std::string bs("\xF1\x80\x80\x83");
+    std::wstring ws = myconv.from_bytes('a');
+    assert(ws == L"a");
+    ws = myconv.from_bytes(bs.c_str());
+    assert(ws == L"\x40003");
+    ws = myconv.from_bytes(bs);
+    assert(ws == L"\x40003");
+    ws = myconv.from_bytes(bs.data(), bs.data() + bs.size());
+    assert(ws == L"\x40003");
+    ws = myconv.from_bytes("");
+    assert(ws.size() == 0);
+  }
+}
+
+int main() { TestHelper<wchar_t>::test(); }
diff --git a/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
index 7253a18a70b1..0a6cab73bf9e 100644
--- a/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
+++ b/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
@@ -20,20 +20,53 @@
 #include <codecvt>
 #include <cassert>
 
-int main()
-{
-    {
-        std::wstring_convert<std::codecvt_utf8<wchar_t> > myconv;
-        std::wstring ws(1, L'\x40003');
-        std::string bs = myconv.to_bytes(ws[0]);
-        assert(bs == "\xF1\x80\x80\x83");
-        bs = myconv.to_bytes(ws.c_str());
-        assert(bs == "\xF1\x80\x80\x83");
-        bs = myconv.to_bytes(ws);
-        assert(bs == "\xF1\x80\x80\x83");
-        bs = myconv.to_bytes(ws.data(), ws.data() + ws.size());
-        assert(bs == "\xF1\x80\x80\x83");
-        bs = myconv.to_bytes(L"");
-        assert(bs.size() == 0);
-    }
+template <class CharT, size_t = sizeof(CharT)>
+struct TestHelper;
+template <class CharT>
+struct TestHelper<CharT, 2> {
+  static void test();
+};
+template <class CharT>
+struct TestHelper<CharT, 4> {
+  static void test();
+};
+
+template <class CharT>
+void TestHelper<CharT, 2>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    std::wstring_convert<std::codecvt_utf8<CharT> > myconv;
+    std::wstring ws(1, CharT(0x1005));
+    std::string bs = myconv.to_bytes(ws[0]);
+    assert(bs == "\xE1\x80\x85\x00");
+    bs = myconv.to_bytes(ws.c_str());
+    assert(bs == "\xE1\x80\x85\x00");
+    bs = myconv.to_bytes(ws);
+    assert(bs == "\xE1\x80\x85\x00");
+    bs = myconv.to_bytes(ws.data(), ws.data() + ws.size());
+    assert(bs == "\xE1\x80\x85\x00");
+    bs = myconv.to_bytes(L"");
+    assert(bs.size() == 0);
+  }
 }
+
+template <class CharT>
+void TestHelper<CharT, 4>::test() {
+  static_assert((std::is_same<CharT, wchar_t>::value), "");
+  {
+    std::wstring_convert<std::codecvt_utf8<CharT> > myconv;
+    std::wstring ws(1, CharT(0x40003));
+    std::string bs = myconv.to_bytes(ws[0]);
+    assert(bs == "\xF1\x80\x80\x83");
+    bs = myconv.to_bytes(ws.c_str());
+    assert(bs == "\xF1\x80\x80\x83");
+    bs = myconv.to_bytes(ws);
+    assert(bs == "\xF1\x80\x80\x83");
+    bs = myconv.to_bytes(ws.data(), ws.data() + ws.size());
+    assert(bs == "\xF1\x80\x80\x83");
+    bs = myconv.to_bytes(L"");
+    assert(bs.size() == 0);
+  }
+}
+
+int main() { TestHelper<wchar_t>::test(); }
diff --git a/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
index 961b515ef8d8..517a62a0068f 100644
--- a/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
+++ b/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
@@ -16,8 +16,10 @@
 
 #include <numeric>
 #include <cassert>
+#include <climits>
+#include <cstdint>
 #include <cstdlib>    // for rand()
-#include <iostream>
+#include <type_traits>
 
 constexpr struct {
   int x;
@@ -36,21 +38,24 @@ constexpr struct {
 
 
 template <typename Input1, typename Input2, typename Output>
-constexpr bool test0(Input1 in1, Input2 in2, Output out)
+constexpr bool test0(int in1, int in2, int out)
 {
-    static_assert((std::is_same<Output, decltype(std::gcd(in1, in2))>::value), "" );
-    static_assert((std::is_same<Output, decltype(std::gcd(in2, in1))>::value), "" );
-    return out == std::gcd(in1, in2) ? true : (std::abort(), false);
+    auto value1 = static_cast<Input1>(in1);
+    auto value2 = static_cast<Input2>(in2);
+    static_assert(std::is_same_v<Output, decltype(std::gcd(value1, value2))>, "");
+    static_assert(std::is_same_v<Output, decltype(std::gcd(value2, value1))>, "");
+    assert(static_cast<Output>(out) == std::gcd(value1, value2));
+    return true;
 }
 
 
 template <typename Input1, typename Input2 = Input1>
 constexpr bool do_test(int = 0)
 {
-    using S1 = typename std::make_signed<Input1>::type;
-    using S2 = typename std::make_signed<Input2>::type;
-    using U1 = typename std::make_unsigned<Input1>::type;
-    using U2 = typename std::make_unsigned<Input2>::type;
+    using S1 = std::make_signed_t<Input1>;
+    using S2 = std::make_signed_t<Input2>;
+    using U1 = std::make_unsigned_t<Input1>;
+    using U2 = std::make_unsigned_t<Input2>;
     bool accumulate = true;
     for (auto TC : Cases) {
         { // Test with two signed types
@@ -103,15 +108,15 @@ int main()
     assert(do_test<long>(non_cce));
     assert(do_test<long long>(non_cce));
 
-    static_assert(do_test< int8_t>(), "");
-    static_assert(do_test<int16_t>(), "");
-    static_assert(do_test<int32_t>(), "");
-    static_assert(do_test<int64_t>(), "");
+    static_assert(do_test<std::int8_t>(), "");
+    static_assert(do_test<std::int16_t>(), "");
+    static_assert(do_test<std::int32_t>(), "");
+    static_assert(do_test<std::int64_t>(), "");
 
-    assert(do_test< int8_t>(non_cce));
-    assert(do_test<int16_t>(non_cce));
-    assert(do_test<int32_t>(non_cce));
-    assert(do_test<int64_t>(non_cce));
+    assert(do_test<std::int8_t>(non_cce));
+    assert(do_test<std::int16_t>(non_cce));
+    assert(do_test<std::int32_t>(non_cce));
+    assert(do_test<std::int64_t>(non_cce));
 
     static_assert(do_test<signed char, int>(), "");
     static_assert(do_test<int, signed char>(), "");
@@ -133,8 +138,8 @@ int main()
 
 //  LWG#2837
     {
-        auto res = std::gcd((int64_t)1234, (int32_t)-2147483648);
-        static_assert( std::is_same<decltype(res), std::common_type<int64_t, int32_t>::type>::value, "");
-        assert(res == 2);
+    auto res = std::gcd(static_cast<std::int64_t>(1234), INT32_MIN);
+    static_assert(std::is_same_v<decltype(res), std::int64_t>, "");
+    assert(res == 2);
     }
 }
diff --git a/test/std/numerics/numeric.ops/numeric.ops.lcm/lcm.pass.cpp b/test/std/numerics/numeric.ops/numeric.ops.lcm/lcm.pass.cpp
index 90d48398f54a..6bd8a4f1e610 100644
--- a/test/std/numerics/numeric.ops/numeric.ops.lcm/lcm.pass.cpp
+++ b/test/std/numerics/numeric.ops/numeric.ops.lcm/lcm.pass.cpp
@@ -11,12 +11,14 @@
 // <numeric>
 
 // template<class _M, class _N>
-// constexpr common_type_t<_M,_N> gcd(_M __m, _N __n)
+// constexpr common_type_t<_M,_N> lcm(_M __m, _N __n)
 
 #include <numeric>
 #include <cassert>
+#include <climits>
+#include <cstdint>
 #include <cstdlib>
-#include <iostream>
+#include <type_traits>
 
 constexpr struct {
   int x;
@@ -34,21 +36,24 @@ constexpr struct {
 };
 
 template <typename Input1, typename Input2, typename Output>
-constexpr bool test0(Input1 in1, Input2 in2, Output out)
+constexpr bool test0(int in1, int in2, int out)
 {
-    static_assert((std::is_same<Output, decltype(std::lcm(Input1(0), Input2(0)))>::value), "" );
-    static_assert((std::is_same<Output, decltype(std::lcm(Input2(0), Input1(0)))>::value), "" );
-    return out == std::lcm(in1, in2) ? true : (std::abort(), false);
+    auto value1 = static_cast<Input1>(in1);
+    auto value2 = static_cast<Input2>(in2);
+    static_assert(std::is_same_v<Output, decltype(std::lcm(value1, value2))>, "");
+    static_assert(std::is_same_v<Output, decltype(std::lcm(value2, value1))>, "");
+    assert(static_cast<Output>(out) == std::lcm(value1, value2));
+    return true;
 }
 
 
 template <typename Input1, typename Input2 = Input1>
 constexpr bool do_test(int = 0)
 {
-    using S1 = typename std::make_signed<Input1>::type;
-    using S2 = typename std::make_signed<Input2>::type;
-    using U1 = typename std::make_unsigned<Input1>::type;
-    using U2 = typename std::make_unsigned<Input2>::type;
+    using S1 = std::make_signed_t<Input1>;
+    using S2 = std::make_signed_t<Input2>;
+    using U1 = std::make_unsigned_t<Input1>;
+    using U2 = std::make_unsigned_t<Input2>;
     bool accumulate = true;
     for (auto TC : Cases) {
         { // Test with two signed types
@@ -101,15 +106,15 @@ int main()
     assert(do_test<long>(non_cce));
     assert(do_test<long long>(non_cce));
 
-    static_assert(do_test< int8_t>(), "");
-    static_assert(do_test<int16_t>(), "");
-    static_assert(do_test<int32_t>(), "");
-    static_assert(do_test<int64_t>(), "");
+    static_assert(do_test<std::int8_t>(), "");
+    static_assert(do_test<std::int16_t>(), "");
+    static_assert(do_test<std::int32_t>(), "");
+    static_assert(do_test<std::int64_t>(), "");
 
-    assert(do_test< int8_t>(non_cce));
-    assert(do_test<int16_t>(non_cce));
-    assert(do_test<int32_t>(non_cce));
-    assert(do_test<int64_t>(non_cce));
+    assert(do_test<std::int8_t>(non_cce));
+    assert(do_test<std::int16_t>(non_cce));
+    assert(do_test<std::int32_t>(non_cce));
+    assert(do_test<std::int64_t>(non_cce));
 
     static_assert(do_test<signed char, int>(), "");
     static_assert(do_test<int, signed char>(), "");
@@ -131,9 +136,9 @@ int main()
 
 //  LWG#2837
     {
-    auto res1 = std::lcm((int64_t)1234, (int32_t)-2147483648);
-    (void) std::lcm<int, unsigned long>(INT_MIN, 2);	// this used to trigger UBSAN
-    static_assert( std::is_same<decltype(res1), std::common_type<int64_t, int32_t>::type>::value, "");
-	assert(res1 == 1324997410816LL);
+    auto res1 = std::lcm(static_cast<std::int64_t>(1234), INT32_MIN);
+    (void)std::lcm(INT_MIN, 2UL);	// this used to trigger UBSAN
+    static_assert(std::is_same_v<decltype(res1), std::int64_t>, "");
+    assert(res1 == 1324997410816LL);
     }
 }
diff --git a/test/std/re/re.traits/lookup_classname.pass.cpp b/test/std/re/re.traits/lookup_classname.pass.cpp
index 2215b9043d5b..b61f772b2df8 100644
--- a/test/std/re/re.traits/lookup_classname.pass.cpp
+++ b/test/std/re/re.traits/lookup_classname.pass.cpp
@@ -27,39 +27,61 @@ test(const char_type* A,
      typename std::regex_traits<char_type>::char_class_type expected,
      bool icase = false)
 {
+    typedef typename std::regex_traits<char_type>::char_class_type char_class_type;
     std::regex_traits<char_type> t;
     typedef forward_iterator<const char_type*> F;
-    assert(t.lookup_classname(F(A), F(A + t.length(A)), icase) == expected);
+    char_class_type result = t.lookup_classname(F(A), F(A + t.length(A)), icase);
+    assert(result == expected);
+}
+
+template <class char_type>
+void
+test_w(const char_type* A,
+       typename std::regex_traits<char_type>::char_class_type expected,
+        bool icase = false)
+{
+    typedef typename std::regex_traits<char_type>::char_class_type char_class_type;
+    std::regex_traits<char_type> t;
+    typedef forward_iterator<const char_type*> F;
+    char_class_type result = t.lookup_classname(F(A), F(A + t.length(A)), icase);
+    assert((result & expected) == expected);
+    LIBCPP_ASSERT((expected | std::regex_traits<char_type>::__regex_word) == result);
+
+    const bool matches_underscore = t.isctype('_', result);
+    if (result != expected)
+      assert(matches_underscore && "expected to match underscore");
+    else
+      assert(!matches_underscore && "should not match underscore");
 }
 
 int main()
 {
 //  if __regex_word is not distinct from all the classes, bad things happen
 //  See https://bugs.llvm.org/show_bug.cgi?id=26476 for an example.
-    assert((std::ctype_base::space  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::print  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::cntrl  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::upper  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::lower  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::alpha  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::digit  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::punct  & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::xdigit & std::regex_traits<char>::__regex_word) == 0);
-    assert((std::ctype_base::blank  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::space  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::print  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::cntrl  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::upper  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::lower  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::alpha  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::digit  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::punct  & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::xdigit & std::regex_traits<char>::__regex_word) == 0);
+    LIBCPP_ASSERT((std::ctype_base::blank  & std::regex_traits<char>::__regex_word) == 0);
 
     test("d", std::ctype_base::digit);
     test("D", std::ctype_base::digit);
     test("d", std::ctype_base::digit, true);
     test("D", std::ctype_base::digit, true);
 
-    test("w", std::regex_traits<char>::__regex_word | std::ctype_base::alnum
-                      | std::ctype_base::upper | std::ctype_base::lower);
-    test("W", std::regex_traits<char>::__regex_word | std::ctype_base::alnum
-                      | std::ctype_base::upper | std::ctype_base::lower);
-    test("w", std::regex_traits<char>::__regex_word | std::ctype_base::alnum
-                      | std::ctype_base::upper | std::ctype_base::lower, true);
-    test("W", std::regex_traits<char>::__regex_word | std::ctype_base::alnum
-                      | std::ctype_base::upper | std::ctype_base::lower, true);
+    test_w("w", std::ctype_base::alnum
+              | std::ctype_base::upper | std::ctype_base::lower);
+    test_w("W", std::ctype_base::alnum
+              | std::ctype_base::upper | std::ctype_base::lower);
+    test_w("w", std::ctype_base::alnum
+              | std::ctype_base::upper | std::ctype_base::lower, true);
+    test_w("W", std::ctype_base::alnum
+              | std::ctype_base::upper | std::ctype_base::lower, true);
 
     test("s", std::ctype_base::space);
     test("S", std::ctype_base::space);
@@ -140,13 +162,13 @@ int main()
     test(L"d", std::ctype_base::digit, true);
     test(L"D", std::ctype_base::digit, true);
 
-    test(L"w", std::regex_traits<wchar_t>::__regex_word | std::ctype_base::alnum
+    test_w(L"w", std::ctype_base::alnum
                       | std::ctype_base::upper | std::ctype_base::lower);
-    test(L"W", std::regex_traits<wchar_t>::__regex_word | std::ctype_base::alnum
+    test_w(L"W", std::ctype_base::alnum
                       | std::ctype_base::upper | std::ctype_base::lower);
-    test(L"w", std::regex_traits<wchar_t>::__regex_word | std::ctype_base::alnum
+    test_w(L"w", std::ctype_base::alnum
                       | std::ctype_base::upper | std::ctype_base::lower, true);
-    test(L"W", std::regex_traits<wchar_t>::__regex_word | std::ctype_base::alnum
+    test_w(L"W", std::ctype_base::alnum
                       | std::ctype_base::upper | std::ctype_base::lower, true);
 
     test(L"s", std::ctype_base::space);
diff --git a/test/std/strings/basic.string/string.cons/T_size_size.pass.cpp b/test/std/strings/basic.string/string.cons/T_size_size.pass.cpp
index c8d14a1d0ddf..67ac43494a6e 100644
--- a/test/std/strings/basic.string/string.cons/T_size_size.pass.cpp
+++ b/test/std/strings/basic.string/string.cons/T_size_size.pass.cpp
@@ -27,16 +27,17 @@
 
 template <class S, class SV>
 void
-test(SV sv, unsigned pos, unsigned n)
+test(SV sv, std::size_t pos, std::size_t n)
 {
     typedef typename S::traits_type T;
     typedef typename S::allocator_type A;
+    typedef typename S::size_type Size;
     if (pos <= sv.size())
     {
-        S s2(sv, pos, n);
+        S s2(sv, static_cast<Size>(pos), static_cast<Size>(n));
         LIBCPP_ASSERT(s2.__invariants());
         assert(pos <= sv.size());
-        unsigned rlen = std::min<unsigned>(sv.size() - pos, n);
+        std::size_t rlen = std::min(sv.size() - pos, n);
         assert(s2.size() == rlen);
         assert(T::compare(s2.data(), sv.data() + pos, rlen) == 0);
         assert(s2.get_allocator() == A());
@@ -47,7 +48,7 @@ test(SV sv, unsigned pos, unsigned n)
     {
         try
         {
-            S s2(sv, pos, n);
+            S s2(sv, static_cast<Size>(pos), static_cast<Size>(n));
             assert(false);
         }
         catch (std::out_of_range&)
@@ -60,15 +61,16 @@ test(SV sv, unsigned pos, unsigned n)
 
 template <class S, class SV>
 void
-test(SV sv, unsigned pos, unsigned n, const typename S::allocator_type& a)
+test(SV sv, std::size_t pos, std::size_t n, const typename S::allocator_type& a)
 {
     typedef typename S::traits_type T;
+    typedef typename S::size_type Size;
     if (pos <= sv.size())
     {
-        S s2(sv, pos, n, a);
+        S s2(sv, static_cast<Size>(pos), static_cast<Size>(n), a);
         LIBCPP_ASSERT(s2.__invariants());
         assert(pos <= sv.size());
-        unsigned rlen = std::min<unsigned>(sv.size() - pos, n);
+        std::size_t rlen = std::min(sv.size() - pos, n);
         assert(s2.size() == rlen);
         assert(T::compare(s2.data(), sv.data() + pos, rlen) == 0);
         assert(s2.get_allocator() == a);
@@ -79,7 +81,7 @@ test(SV sv, unsigned pos, unsigned n, const typename S::allocator_type& a)
     {
         try
         {
-            S s2(sv, pos, n, a);
+            S s2(sv, static_cast<Size>(pos), static_cast<Size>(n), a);
             assert(false);
         }
         catch (std::out_of_range&)
diff --git a/test/std/strings/string.view/string.view.modifiers/clear.pass.cpp b/test/std/strings/string.view/string.view.modifiers/clear.pass.cpp
deleted file mode 100644
index c1137e80acaf..000000000000
--- a/test/std/strings/string.view/string.view.modifiers/clear.pass.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-// <string_view>
-
-// void clear() noexcept
-
-#include <string_view>
-#include <cassert>
-
-#include "test_macros.h"
-
-template<typename CharT>
-void test ( const CharT *s, size_t len ) {
-    typedef std::basic_string_view<CharT> SV;
-    {
-    SV sv1 ( s );
-    assert ( sv1.size() == len );
-    assert ( sv1.data() == s );
-
-    sv1.clear ();
-    assert ( sv1.data() == nullptr );
-    assert ( sv1.size() == 0 );
-    assert ( sv1 == SV());
-    }
-}
-
-#if TEST_STD_VER > 11
-constexpr size_t test_ce ( size_t n ) {
-    typedef std::basic_string_view<char> SV;
-    SV sv1{ "ABCDEFGHIJKL", n };
-    sv1.clear();
-    return sv1.size();
-}
-#endif
-
-int main () {
-    test ( "ABCDE", 5 );
-    test ( "a", 1 );
-    test ( "", 0 );
-
-    test ( L"ABCDE", 5 );
-    test ( L"a", 1 );
-    test ( L"", 0 );
-
-#if TEST_STD_VER >= 11
-    test ( u"ABCDE", 5 );
-    test ( u"a", 1 );
-    test ( u"", 0 );
-
-    test ( U"ABCDE", 5 );
-    test ( U"a", 1 );
-    test ( U"", 0 );
-#endif
-
-#if TEST_STD_VER > 11
-    static_assert ( test_ce (5) == 0, "" );
-#endif
-
-}
diff --git a/test/std/thread/thread.threads/thread.thread.this/sleep_for_tested_elsewhere.pass.cpp b/test/std/thread/thread.threads/thread.thread.this/sleep_for_tested_elsewhere.pass.cpp
new file mode 100644
index 000000000000..3406fff70610
--- /dev/null
+++ b/test/std/thread/thread.threads/thread.thread.this/sleep_for_tested_elsewhere.pass.cpp
@@ -0,0 +1,22 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <thread>
+
+// template <class Rep, class Period>
+//   void sleep_for(const chrono::duration<Rep, Period>& rel_time);
+
+// The std::this_thread::sleep_for test requires POSIX specific headers and
+// is therefore non-standard. For this reason the test lives under the 'libcxx'
+// subdirectory.
+
+int main()
+{
+}
diff --git a/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp b/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
index 0aef2af85ccf..2aa19c6188a5 100644
--- a/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
+++ b/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
@@ -107,5 +107,11 @@ int main()
         assert((a.inner_allocator() ==
             std::scoped_allocator_adaptor<A2<int>, A3<int>>(A2<int>(5), A3<int>(6))));
     }
-
+//  Test for LWG2782
+    {
+        static_assert(!std::is_convertible<A1<int>, A2<int>>::value, "");
+        static_assert(!std::is_convertible<
+             std::scoped_allocator_adaptor<A1<int>>, 
+             std::scoped_allocator_adaptor<A2<int>>>::value, "");
+    }
 }
diff --git a/test/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp b/test/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
index 2b19211be9bf..aa8eb3916db9 100644
--- a/test/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
+++ b/test/std/utilities/function.objects/func.not_fn/not_fn.pass.cpp
@@ -416,7 +416,7 @@ void throws_in_constructor_test()
       ThrowsOnCopy() = default;
       bool operator()() const {
         assert(false);
-#if defined(_LIBCPP_MSVC)
+#if defined(TEST_COMPILER_C1XX)
         __assume(0);
 #else
         __builtin_unreachable();
diff --git a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
index 9d5681a3db76..7516b2e3af2b 100644
--- a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
+++ b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
@@ -11,7 +11,8 @@
 
 // class function<R(ArgTypes...)>
 
-// function(const function& f);
+// function(const function&  f);
+// function(const function&& f);
 
 #include <functional>
 #include <memory>
diff --git a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
index 53476a274735..7a4678ad1cae 100644
--- a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
+++ b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
@@ -63,6 +63,7 @@ int main()
     assert(A::count == 1);
     assert(f.target<A>());
     assert(f.target<int(*)(int)>() == 0);
+    assert(f.target<int>() == nullptr);
     }
     assert(A::count == 0);
     {
@@ -70,6 +71,7 @@ int main()
     assert(A::count == 0);
     assert(f.target<int(*)(int)>());
     assert(f.target<A>() == 0);
+    assert(f.target<int>() == nullptr);
     }
     assert(A::count == 0);
     {
@@ -77,6 +79,7 @@ int main()
     assert(A::count == 1);
     assert(f.target<A>());
     assert(f.target<int(*)(int)>() == 0);
+    assert(f.target<int>() == nullptr);
     }
     assert(A::count == 0);
     {
@@ -84,6 +87,7 @@ int main()
     assert(A::count == 0);
     assert(f.target<int(*)(int)>());
     assert(f.target<A>() == 0);
+    assert(f.target<int>() == nullptr);
     }
     assert(A::count == 0);
 }
diff --git a/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp b/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp
index 5a0d9259c11d..eb0d0a955fc5 100644
--- a/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp
+++ b/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp
@@ -49,6 +49,10 @@ struct Bar : public Foo {
 };
 
 
+struct PrivateBase : private std::enable_shared_from_this<PrivateBase> {
+};
+
+
 int main()
 {
     {  // https://bugs.llvm.org/show_bug.cgi?id=18843
@@ -74,6 +78,12 @@ int main()
     assert(p == q);
     assert(!p.owner_before(q) && !q.owner_before(p)); // p and q share ownership
     }
+    {
+      typedef std::shared_ptr<PrivateBase> APtr;
+      typedef std::weak_ptr<PrivateBase> WeakAPtr;
+      APtr a1 = std::make_shared<PrivateBase>();
+      assert(a1.use_count() == 1);
+    }
     // Test LWG issue 2529. Only reset '__weak_ptr_' when it's already expired.
     // http://cplusplus.github.io/LWG/lwg-active.html#2529.
     // Test two different ways:
diff --git a/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp b/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp
index 552c16075da7..20c9eca8e521 100644
--- a/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp
+++ b/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp
@@ -120,7 +120,9 @@ int main()
     static_assert((!std::is_convertible<ConstFunction, Function>::value), "");
     static_assert((!std::is_convertible<ConstFunction, Function*>::value), "");
     static_assert((!std::is_convertible<ConstFunction, Function&>::value), "");
-    static_assert((!std::is_convertible<ConstFunction, Function>::value), "");
+#if TEST_STD_VER >= 11
+    static_assert((!std::is_convertible<ConstFunction, Function&&>::value), "");
+#endif
     static_assert((!std::is_convertible<Function*, ConstFunction>::value), "");
     static_assert((!std::is_convertible<Function&, ConstFunction>::value), "");
     static_assert((!std::is_convertible<ConstFunction, ConstFunction>::value), "");
diff --git a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable.pass.cpp b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable.pass.cpp
index 42ecdb3b896a..0bb373c96620 100644
--- a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable.pass.cpp
+++ b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_trivially_copyable.pass.cpp
@@ -11,7 +11,9 @@
 
 // is_trivially_copyable
 
-// XFAIL: gcc-4.9
+// These compilers have not implemented Core 2094 which makes volatile
+// qualified types trivially copyable.
+// XFAIL: clang-3, clang-4, apple-clang, gcc
 
 #include <type_traits>
 #include <cassert>
@@ -22,13 +24,13 @@ void test_is_trivially_copyable()
 {
     static_assert( std::is_trivially_copyable<T>::value, "");
     static_assert( std::is_trivially_copyable<const T>::value, "");
-    static_assert(!std::is_trivially_copyable<volatile T>::value, "");
-    static_assert(!std::is_trivially_copyable<const volatile T>::value, "");
+    static_assert( std::is_trivially_copyable<volatile T>::value, "");
+    static_assert( std::is_trivially_copyable<const volatile T>::value, "");
 #if TEST_STD_VER > 14
     static_assert( std::is_trivially_copyable_v<T>, "");
     static_assert( std::is_trivially_copyable_v<const T>, "");
-    static_assert(!std::is_trivially_copyable_v<volatile T>, "");
-    static_assert(!std::is_trivially_copyable_v<const volatile T>, "");
+    static_assert( std::is_trivially_copyable_v<volatile T>, "");
+    static_assert( std::is_trivially_copyable_v<const volatile T>, "");
 #endif
 }
 
diff --git a/test/std/utilities/template.bitset/includes.pass.cpp b/test/std/utilities/template.bitset/includes.pass.cpp
index 2e3c2812e441..e640a1b5b7ed 100644
--- a/test/std/utilities/template.bitset/includes.pass.cpp
+++ b/test/std/utilities/template.bitset/includes.pass.cpp
@@ -11,22 +11,27 @@
 
 #include <bitset>
 
-#ifndef _LIBCPP_CSTDDEF
-#error <cstddef> has not been included
-#endif
-
-#ifndef _LIBCPP_STRING
-#error <string> has not been included
-#endif
-
-#ifndef _LIBCPP_STDEXCEPT
-#error <stdexcept> has not been included
-#endif
-
-#ifndef _LIBCPP_IOSFWD
-#error <iosfwd> has not been included
-#endif
+template <class> void test_typedef() {}
 
 int main()
 {
+  { // test for <cstddef>
+    std::ptrdiff_t p; ((void)p);
+    std::size_t s; ((void)s);
+    std::nullptr_t np; ((void)np);
+  }
+  { // test for <string>
+    std::string s; ((void)s);
+  }
+  { // test for <stdexcept>
+    std::logic_error le("blah"); ((void)le);
+    std::runtime_error re("blah"); ((void)re);
+  }
+  { // test for <iosfwd>
+    test_typedef<std::ios>();
+    test_typedef<std::wios>();
+    test_typedef<std::istream>();
+    test_typedef<std::ostream>();
+    test_typedef<std::iostream>();
+  }
 }
diff --git a/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp b/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
index fbcda44e4065..d7b184f6383c 100644
--- a/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
+++ b/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
@@ -15,6 +15,10 @@
 
 // ~tuple();
 
+// C++17 added:
+//   The destructor of tuple shall be a trivial destructor 
+//     if (is_trivially_destructible_v<Types> && ...) is true.
+
 #include <tuple>
 #include <string>
 #include <cassert>
diff --git a/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp b/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp
index 2d87e7ababab..83c55e75b4d2 100644
--- a/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp
+++ b/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp
@@ -15,6 +15,10 @@
 
 // ~pair()
 
+// C++17 added:
+//   The destructor of pair shall be a trivial destructor 
+//     if (is_trivially_destructible_v<T1> && is_trivially_destructible_v<T2>) is true.
+
 
 #include <utility>
 #include <type_traits>
diff --git a/test/std/utilities/variant/variant.visit/visit.pass.cpp b/test/std/utilities/variant/variant.visit/visit.pass.cpp
index 05b58c16f3d8..316f2d22b01d 100644
--- a/test/std/utilities/variant/variant.visit/visit.pass.cpp
+++ b/test/std/utilities/variant/variant.visit/visit.pass.cpp
@@ -94,6 +94,16 @@ void test_call_operator_forwarding() {
   using Fn = ForwardingCallObject;
   Fn obj{};
   const Fn &cobj = obj;
+  { // test call operator forwarding - no variant
+    std::visit(obj);
+    assert(Fn::check_call<>(CT_NonConst | CT_LValue));
+    std::visit(cobj);
+    assert(Fn::check_call<>(CT_Const | CT_LValue));
+    std::visit(std::move(obj));
+    assert(Fn::check_call<>(CT_NonConst | CT_RValue));
+    std::visit(std::move(cobj));
+    assert(Fn::check_call<>(CT_Const | CT_RValue));
+  }
   { // test call operator forwarding - single variant, single arg
     using V = std::variant<int>;
     V v(42);
diff --git a/test/support/archetypes.hpp b/test/support/archetypes.hpp
index f442b592a26f..533f5869b5ab 100644
--- a/test/support/archetypes.hpp
+++ b/test/support/archetypes.hpp
@@ -5,6 +5,7 @@
 #include <cassert>
 
 #include "test_macros.h"
+#include "test_workarounds.h"
 
 #if TEST_STD_VER >= 11
 
@@ -14,7 +15,9 @@ template <bool, class T>
 struct DepType : T {};
 
 struct NullBase {
+#ifndef TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
 protected:
+#endif // !TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
   NullBase() = default;
   NullBase(NullBase const&) = default;
   NullBase& operator=(NullBase const&) = default;
@@ -81,7 +84,9 @@ struct TestBase {
       ++assigned; ++value_assigned;
       return *this;
     }
+#ifndef TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
 protected:
+#endif // !TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
     ~TestBase() {
       assert(value != -999); assert(alive > 0);
       --alive; ++destroyed; value = -999;
@@ -144,7 +149,9 @@ struct ValueBase {
     }
     //~ValueBase() { assert(value != -999); value = -999; }
     int value;
+#ifndef TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
 protected:
+#endif // !TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
     constexpr static int check_value(int const& val) {
 #if TEST_STD_VER < 14
       return val == -1 || val == 999 ? (TEST_THROW(42), 0) : val;
@@ -197,7 +204,9 @@ struct TrivialValueBase {
     template <bool Dummy = true, typename std::enable_if<Dummy && !Explicit, bool>::type = true>
     constexpr TrivialValueBase(std::initializer_list<int>& il, int = 0) : value(static_cast<int>(il.size())) {}
     int value;
+#ifndef TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
 protected:
+#endif // !TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
     constexpr TrivialValueBase() noexcept : value(0) {}
 };
 
diff --git a/test/support/archetypes.ipp b/test/support/archetypes.ipp
index d8d1e5a9330b..36045017907f 100644
--- a/test/support/archetypes.ipp
+++ b/test/support/archetypes.ipp
@@ -6,7 +6,11 @@
 #define DEFINE_EXPLICIT
 #endif
 #ifndef DEFINE_CONSTEXPR
+#ifdef TEST_WORKAROUND_EDG_EXPLICIT_CONSTEXPR
+#define DEFINE_CONSTEXPR
+#else // TEST_WORKAROUND_EDG_EXPLICIT_CONSTEXPR
 #define DEFINE_CONSTEXPR constexpr
+#endif // TEST_WORKAROUND_EDG_EXPLICIT_CONSTEXPR
 #endif
 #ifndef DEFINE_ASSIGN_CONSTEXPR
 #if TEST_STD_VER >= 14
diff --git a/test/support/filesystem_dynamic_test_helper.py b/test/support/filesystem_dynamic_test_helper.py
index d2b2810d0992..081e678b6e84 100644
--- a/test/support/filesystem_dynamic_test_helper.py
+++ b/test/support/filesystem_dynamic_test_helper.py
@@ -25,7 +25,7 @@ def sanitize(p):
 
 """
 Some of the tests restrict permissions to induce failures.
-Before we delete the test enviroment, we have to walk it and re-raise the
+Before we delete the test environment, we have to walk it and re-raise the
 permissions.
 """
 def clean_recursive(root_p):
diff --git a/test/support/filesystem_test_helper.hpp b/test/support/filesystem_test_helper.hpp
index 6cc0e370a786..755be90351ff 100644
--- a/test/support/filesystem_test_helper.hpp
+++ b/test/support/filesystem_test_helper.hpp
@@ -227,7 +227,7 @@ struct scoped_test_env
     }
 
     static inline void fs_helper_run(std::string const& raw_cmd) {
-        // check that the fs test root in the enviroment matches what we were
+        // check that the fs test root in the environment matches what we were
         // compiled with.
         static bool checked = checkDynamicTestRoot();
         ((void)checked);
@@ -246,7 +246,7 @@ struct scoped_test_env
             std::abort();
         }
         if (std::string(fs_root) != LIBCXX_FILESYSTEM_DYNAMIC_TEST_ROOT) {
-            std::printf("ERROR: LIBCXX_FILESYSTEM_DYNAMIC_TEST_ROOT enviroment variable"
+            std::printf("ERROR: LIBCXX_FILESYSTEM_DYNAMIC_TEST_ROOT environment variable"
                         " must have the same value as when the test was compiled.\n");
             std::printf("   Current Value:  '%s'\n", fs_root);
             std::printf("   Expected Value: '%s'\n", LIBCXX_FILESYSTEM_DYNAMIC_TEST_ROOT);
diff --git a/test/support/msvc_stdlib_force_include.hpp b/test/support/msvc_stdlib_force_include.hpp
index 2b78e507a327..6bcc97952199 100644
--- a/test/support/msvc_stdlib_force_include.hpp
+++ b/test/support/msvc_stdlib_force_include.hpp
@@ -13,11 +13,13 @@
 // This header is force-included when running the libc++ tests against the
 // MSVC standard library.
 
-// Silence warnings about CRT machinery.
-#define _CRT_SECURE_NO_WARNINGS
+#ifndef _LIBCXX_IN_DEVCRT
+    // Silence warnings about CRT machinery.
+    #define _CRT_SECURE_NO_WARNINGS
 
-// Avoid assertion dialogs.
-#define _CRT_SECURE_INVALID_PARAMETER(EXPR) ::abort()
+    // Avoid assertion dialogs.
+    #define _CRT_SECURE_INVALID_PARAMETER(EXPR) ::abort()
+#endif // _LIBCXX_IN_DEVCRT
 
 #include <crtdbg.h>
 #include <stdlib.h>
@@ -31,6 +33,7 @@
     #define _MSVC_STL_VER 42
 #endif
 
+#ifndef _LIBCXX_IN_DEVCRT
 struct AssertionDialogAvoider {
     AssertionDialogAvoider() {
         _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
@@ -42,6 +45,7 @@ struct AssertionDialogAvoider {
 };
 
 const AssertionDialogAvoider assertion_dialog_avoider{};
+#endif // _LIBCXX_IN_DEVCRT
 
 // MSVC frontend only configurations
 #if !defined(__clang__)
@@ -66,16 +70,18 @@ const AssertionDialogAvoider assertion_dialog_avoider{};
 // MSVC has quick_exit() and at_quick_exit().
 #define _LIBCPP_HAS_QUICK_EXIT
 
-// atomic_is_lock_free.pass.cpp needs this VS 2015 Update 2 fix.
-#define _ENABLE_ATOMIC_ALIGNMENT_FIX
+#ifndef _LIBCXX_IN_DEVCRT
+    // atomic_is_lock_free.pass.cpp needs this VS 2015 Update 2 fix.
+    #define _ENABLE_ATOMIC_ALIGNMENT_FIX
 
-// Enable features that /std:c++latest removes by default.
-#define _HAS_AUTO_PTR_ETC          1
-#define _HAS_FUNCTION_ASSIGN       1
-#define _HAS_OLD_IOSTREAMS_MEMBERS 1
+    // Enable features that /std:c++latest removes by default.
+    #define _HAS_AUTO_PTR_ETC          1
+    #define _HAS_FUNCTION_ASSIGN       1
+    #define _HAS_OLD_IOSTREAMS_MEMBERS 1
 
-// Silence warnings about raw pointers and other unchecked iterators.
-#define _SCL_SECURE_NO_WARNINGS
+    // Silence warnings about raw pointers and other unchecked iterators.
+    #define _SCL_SECURE_NO_WARNINGS
+#endif // _LIBCXX_IN_DEVCRT
 
 #include <ciso646>
 
diff --git a/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp b/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp
new file mode 100644
index 000000000000..856574dbcf23
--- /dev/null
+++ b/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03
+
+// Verify TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK.
+
+#include <type_traits>
+
+#include "test_workarounds.h"
+
+struct X {
+    X(int) {}
+
+    X(X&&) = default;
+    X& operator=(X&&) = default;
+
+private:
+    X(const X&) = default;
+    X& operator=(const X&) = default;
+};
+
+void PushFront(X&&) {}
+
+template<class T = int>
+auto test(int) -> decltype(PushFront(std::declval<T>()), std::true_type{});
+auto test(long) -> std::false_type;
+
+int main() {
+#if defined(TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK)
+    static_assert(!decltype(test(0))::value, "");
+#else
+    static_assert(decltype(test(0))::value, "");
+#endif
+}
diff --git a/test/support/test_macros.h b/test/support/test_macros.h
index 46fcb66fecfc..6b573d40fe68 100644
--- a/test/support/test_macros.h
+++ b/test/support/test_macros.h
@@ -52,10 +52,12 @@
 #define TEST_HAS_BUILTIN_IDENTIFIER(X) 0
 #endif
 
-#if defined(__clang__)
-#define TEST_COMPILER_CLANG
+#if defined(__EDG__)
+# define TEST_COMPILER_EDG
+#elif defined(__clang__)
+# define TEST_COMPILER_CLANG
 # if defined(__apple_build_version__)
-#   define TEST_COMPILER_APPLE_CLANG
+#  define TEST_COMPILER_APPLE_CLANG
 # endif
 #elif defined(_MSC_VER)
 # define TEST_COMPILER_C1XX
diff --git a/test/support/test_workarounds.h b/test/support/test_workarounds.h
index b24c883902e8..614cec7aab86 100644
--- a/test/support/test_workarounds.h
+++ b/test/support/test_workarounds.h
@@ -13,9 +13,16 @@
 
 #include "test_macros.h"
 
+#if defined(TEST_COMPILER_EDG)
+# define TEST_WORKAROUND_EDG_EXPLICIT_CONSTEXPR
+#endif
+
 #if defined(TEST_COMPILER_C1XX)
 # define TEST_WORKAROUND_C1XX_BROKEN_NULLPTR_CONVERSION_OPERATOR
 # define TEST_WORKAROUND_C1XX_BROKEN_IS_TRIVIALLY_COPYABLE
+# ifndef _MSC_EXTENSIONS
+#  define TEST_WORKAROUND_C1XX_BROKEN_ZA_CTOR_CHECK
+# endif
 #endif
 
 #endif // SUPPORT_TEST_WORKAROUNDS_H
diff --git a/utils/libcxx/test/config.py b/utils/libcxx/test/config.py
index 7f1ae851b9ce..25553c70da31 100644
--- a/utils/libcxx/test/config.py
+++ b/utils/libcxx/test/config.py
@@ -67,7 +67,7 @@ def __init__(self, lit_config, config):
         self.abi_library_root = None
         self.link_shared = self.get_lit_bool('enable_shared', default=True)
         self.debug_build = self.get_lit_bool('debug_build',   default=False)
-        self.exec_env = {}
+        self.exec_env = dict(os.environ)
         self.use_target = False
         self.use_system_cxx_lib = False
         self.use_clang_verify = False
@@ -160,7 +160,11 @@ def print_config_info(self):
         # Print as list to prevent "set([...])" from being printed.
         self.lit_config.note('Using available_features: %s' %
                              list(self.config.available_features))
-        self.lit_config.note('Using environment: %r' % self.exec_env)
+        show_env_vars = {}
+        for k,v in self.exec_env.items():
+            if k not in os.environ or os.environ[k] != v:
+                show_env_vars[k] = v
+        self.lit_config.note('Adding environment variables: %r' % show_env_vars)
         sys.stderr.flush()  # Force flushing to avoid broken output on Windows
 
     def get_test_format(self):
@@ -546,6 +550,7 @@ def configure_default_compile_flags(self):
 
     def configure_compile_flags_header_includes(self):
         support_path = os.path.join(self.libcxx_src_root, 'test', 'support')
+        self.configure_config_site_header()
         if self.cxx_stdlib_under_test != 'libstdc++' and \
            not self.is_windows:
             self.cxx.compile_flags += [
@@ -561,7 +566,6 @@ def configure_compile_flags_header_includes(self):
                 '-include', os.path.join(support_path,
                                          'set_windows_crt_report_mode.h')
             ]
-        self.configure_config_site_header()
         cxx_headers = self.get_lit_conf('cxx_headers')
         if cxx_headers == '' or (cxx_headers is None
                                  and self.cxx_stdlib_under_test != 'libc++'):
@@ -868,6 +872,9 @@ def configure_warnings(self):
         # FIXME: Enable the two warnings below.
         self.cxx.addWarningFlagIfSupported('-Wno-conversion')
         self.cxx.addWarningFlagIfSupported('-Wno-unused-local-typedef')
+        # FIXME: Remove this warning once the min/max handling patch lands
+        # See https://reviews.llvm.org/D33080
+        self.cxx.addWarningFlagIfSupported('-Wno-#warnings')
         std = self.get_lit_conf('std', None)
         if std in ['c++98', 'c++03']:
             # The '#define static_assert' provided by libc++ in C++03 mode
@@ -1002,18 +1009,8 @@ def configure_substitutions(self):
         sub.append(('%link', link_str))
         sub.append(('%build', build_str))
         # Configure exec prefix substitutions.
-        exec_env_str = ''
-        if not self.is_windows and len(self.exec_env) != 0:
-            exec_env_str = 'env '
-            for k, v in self.exec_env.items():
-                exec_env_str += ' %s=%s' % (k, v)
         # Configure run env substitution.
-        exec_str = exec_env_str
-        if self.lit_config.useValgrind:
-            exec_str = ' '.join(self.lit_config.valgrindArgs) + exec_env_str
-        sub.append(('%exec', exec_str))
-        # Configure run shortcut
-        sub.append(('%run', exec_str + ' %t.exe'))
+        sub.append(('%run', '%t.exe'))
         # Configure not program substitutions
         not_py = os.path.join(self.libcxx_src_root, 'utils', 'not.py')
         not_str = '%s %s ' % (pipes.quote(sys.executable), pipes.quote(not_py))
diff --git a/utils/libcxx/test/executor.py b/utils/libcxx/test/executor.py
index 4a189174d915..0ccf96caa8ba 100644
--- a/utils/libcxx/test/executor.py
+++ b/utils/libcxx/test/executor.py
@@ -38,36 +38,11 @@ def __init__(self):
 
     def run(self, exe_path, cmd=None, work_dir='.', file_deps=None, env=None):
         cmd = cmd or [exe_path]
-        env_cmd = []
-        if env:
-            env_cmd += ['env']
-            env_cmd += ['%s=%s' % (k, v) for k, v in env.items()]
         if work_dir == '.':
             work_dir = os.getcwd()
-        if not self.is_windows:
-            out, err, rc = executeCommand(env_cmd + cmd, cwd=work_dir)
-        else:
-            out, err, rc = executeCommand(cmd, cwd=work_dir,
-                                          env=self._build_windows_env(env))
-        return (env_cmd + cmd, out, err, rc)
+        out, err, rc = executeCommand(cmd, cwd=work_dir, env=env)
+        return (cmd, out, err, rc)
 
-    def _build_windows_env(self, exec_env):
-        # FIXME: Finding Windows DLL's at runtime requires modifying the
-        #   PATH environment variables. However we don't want to print out
-        #   the entire PATH as part of the diagnostic for every failing test.
-        #   Therefore this hack builds a new executable environment that
-        #   merges the current environment and the supplied environment while
-        #   still only printing the supplied environment in diagnostics.
-        if not self.is_windows or exec_env is None:
-            return None
-        new_env = dict(os.environ)
-        for key, value in exec_env.items():
-            if key == 'PATH':
-                assert value.strip() != '' and "expected non-empty path"
-                new_env['PATH'] = "%s;%s" % (value, os.environ['PATH'])
-            else:
-                new_env[key] = value
-        return new_env
 
 class PrefixExecutor(Executor):
     """Prefix an executor with some other command wrapper.
diff --git a/utils/libcxx/test/format.py b/utils/libcxx/test/format.py
index c3bc97187ad4..3ac5472b4720 100644
--- a/utils/libcxx/test/format.py
+++ b/utils/libcxx/test/format.py
@@ -139,7 +139,7 @@ def _execute(self, test, lit_config):
                 # We can't run ShTest tests with a executor yet.
                 # For now, bail on trying to run them
                 return lit.Test.UNSUPPORTED, 'ShTest format not yet supported'
-            test.config.enviroment = dict(self.exec_env)
+            test.config.environment = dict(self.exec_env)
             return lit.TestRunner._runShTest(test, lit_config,
                                              self.execute_external, script,
                                              tmpBase)
diff --git a/www/cxx1z_status.html b/www/cxx1z_status.html
index 867a6c41f9ad..101ad43212e5 100644
--- a/www/cxx1z_status.html
+++ b/www/cxx1z_status.html
@@ -387,7 +387,7 @@
 	<tr><td><a href="http://wg21.link/LWG2584">2584</a></td><td><regex> ECMAScript IdentityEscape is ambiguous</td><td>Issaquah</td><td></td></tr>
 	<tr><td><a href="http://wg21.link/LWG2588">2588</a></td><td>[fund.ts.v2] "Convertible to bool" requirement in conjunction and disjunction</td><td>Issaquah</td><td></td></tr>
 	<tr><td><a href="http://wg21.link/LWG2589">2589</a></td><td>match_results can't satisfy the requirements of a container</td><td>Issaquah</td><td>Complete</td></tr>
-	<tr><td><a href="http://wg21.link/LWG2591">2591</a></td><td>std::function's member template target() should not lead to undefined behaviour</td><td>Issaquah</td><td></td></tr>
+	<tr><td><a href="http://wg21.link/LWG2591">2591</a></td><td>std::function's member template target() should not lead to undefined behaviour</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2598">2598</a></td><td>addressof works on temporaries</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2664">2664</a></td><td>operator/ (and other append) semantics not useful if argument has root</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2665">2665</a></td><td>remove_filename() post condition is incorrect</td><td>Issaquah</td><td>Complete</td></tr>
@@ -437,7 +437,7 @@
 	<tr><td><a href="http://wg21.link/LWG2768">2768</a></td><td>any_cast and move semantics</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2769">2769</a></td><td>Redundant const in the return type of any_cast(const any&amp;)</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2781">2781</a></td><td>Contradictory requirements for std::function and std::reference_wrapper</td><td>Kona</td><td>Complete</td></tr>
-	<tr><td><a href="http://wg21.link/LWG2782">2782</a></td><td>scoped_allocator_adaptor constructors must be constrained</td><td>Kona</td><td></td></tr>
+	<tr><td><a href="http://wg21.link/LWG2782">2782</a></td><td>scoped_allocator_adaptor constructors must be constrained</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2784">2784</a></td><td>Resolution to LWG 2484 is missing "otherwise, no effects" and is hard to parse</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2785">2785</a></td><td>quoted should work with basic_string_view</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2786">2786</a></td><td>Annex C should mention shared_ptr changes for array support</td><td>Kona</td><td>Complete</td></tr>
@@ -447,7 +447,7 @@
 	<tr><td><a href="http://wg21.link/LWG2790">2790</a></td><td>Missing specification of istreambuf_iterator::operator-&gt;</td><td>Kona</td><td></td></tr>
 	<tr><td><a href="http://wg21.link/LWG2794">2794</a></td><td>Missing requirements for allocator pointers</td><td>Kona</td><td></td></tr>
 	<tr><td><a href="http://wg21.link/LWG2795">2795</a></td><td>&sect;[global.functions] provides incorrect example of ADL use</td><td>Kona</td><td>Complete</td></tr>
-	<tr><td><a href="http://wg21.link/LWG2796">2796</a></td><td>tuple should be a literal type</td><td>Kona</td><td></td></tr>
+	<tr><td><a href="http://wg21.link/LWG2796">2796</a></td><td>tuple should be a literal type</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2801">2801</a></td><td>Default-constructibility of unique_ptr</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2802">2802</a></td><td>shared_ptr constructor requirements for a deleter</td><td>Kona</td><td></td></tr>
 	<tr><td><a href="http://wg21.link/LWG2804">2804</a></td><td>Unconditional constexpr default constructor for istream_iterator</td><td>Kona</td><td>Complete</td></tr>
@@ -461,7 +461,7 @@
 	<tr><td><a href="http://wg21.link/LWG2837">2837</a></td><td>gcd and lcm should support a wider range of input values</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2838">2838</a></td><td>is_literal_type specification needs a little cleanup</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2842">2842</a></td><td>in_place_t check for optional::optional(U&amp;&amp;) should decay U</td><td>Kona</td><td>Complete</td></tr>
-	<tr><td><a href="http://wg21.link/LWG2850">2850</a></td><td>std::function move constructor does unnecessary work</td><td>Kona</td><td></td></tr>
+	<tr><td><a href="http://wg21.link/LWG2850">2850</a></td><td>std::function move constructor does unnecessary work</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2853">2853</a></td><td>Possible inconsistency in specification of erase in [vector.modifiers]</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2855">2855</a></td><td>std::throw_with_nested("string_literal")</td><td>Kona</td><td>Complete</td></tr>
 	<tr><td><a href="http://wg21.link/LWG2857">2857</a></td><td>{variant,optional,any}::emplace should return the constructed value</td><td>Kona</td><td>Complete</td></tr>
@@ -489,7 +489,7 @@
 <!-- 	<tr><td></td><td></td><td></td><td></td></tr> -->
   </table>
 
-  <p>Last Updated: 17-Apr-2017</p>
+  <p>Last Updated: 11-May-2017</p>
 </div>
 </body>
 </html>

From 022ebf5bbf58ca2dd943d3376cc95a6b206db799 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Tue, 16 May 2017 19:47:41 +0000
Subject: [PATCH 3/9] Vendor import of lld trunk r303197:
 https://llvm.org/svn/llvm-project/lld/trunk@303197

---
 COFF/Driver.cpp                             |   2 +-
 COFF/ICF.cpp                                |   4 +-
 COFF/MapFile.cpp                            |   4 +-
 COFF/Writer.cpp                             |  16 +-
 ELF/Config.h                                |   1 -
 ELF/Driver.cpp                              |   8 +-
 ELF/GdbIndex.h                              |   2 +-
 ELF/ICF.cpp                                 |   2 +-
 ELF/InputFiles.cpp                          |   6 +-
 ELF/InputSection.cpp                        |  56 ++-
 ELF/LinkerScript.cpp                        |  84 ++--
 ELF/LinkerScript.h                          |  11 +-
 ELF/MapFile.cpp                             |   2 +-
 ELF/OutputSections.cpp                      |  13 +-
 ELF/OutputSections.h                        |   1 +
 ELF/Relocations.cpp                         |  80 ++--
 ELF/ScriptParser.cpp                        |  24 +-
 ELF/Strings.cpp                             |   4 +-
 ELF/Symbols.cpp                             |   9 +-
 ELF/Symbols.h                               |   2 +-
 ELF/SyntheticSections.cpp                   | 217 ++++-----
 ELF/SyntheticSections.h                     |  51 ++-
 ELF/Target.cpp                              |  60 ++-
 ELF/Threads.h                               |  21 +-
 ELF/Thunks.cpp                              |  18 +-
 ELF/Writer.cpp                              | 470 ++++++++++----------
 ELF/Writer.h                                |   2 +-
 docs/CMakeLists.txt                         |   2 +-
 include/lld/Core/Parallel.h                 | 166 -------
 include/lld/Core/TaskGroup.h                |  65 ---
 lib/Core/CMakeLists.txt                     |   1 -
 lib/Core/TaskGroup.cpp                      | 141 ------
 lib/ReaderWriter/MachO/LayoutPass.cpp       |  10 +-
 test/COFF/constant-export.test              |  85 +++-
 test/COFF/constant-export.yaml              |  83 ----
 test/ELF/Inputs/i386-static-tls-model1.s    |  10 -
 test/ELF/Inputs/i386-static-tls-model2.s    |   9 -
 test/ELF/Inputs/i386-static-tls-model3.s    |   9 -
 test/ELF/Inputs/i386-static-tls-model4.s    |   9 -
 test/ELF/gdb-index-empty.s                  | 116 +++++
 test/ELF/gdb-index-gc-sections.s            | 157 +++++++
 test/ELF/i386-static-tls-model.s            |  20 -
 test/ELF/i386-tls-ie-shared.s               |  52 +--
 test/ELF/incompatible-section-types2.s      |   6 +-
 test/ELF/linkerscript/early-assign-symbol.s |  14 +
 test/ELF/linkerscript/ehdr_start.s          |  14 +-
 test/ELF/linkerscript/sections-constraint.s |   2 +-
 test/ELF/linkerscript/sections.s            |   5 +-
 test/ELF/linkerscript/symbol-memoryexpr.s   |  33 ++
 test/ELF/many-alloc-sections.s              | 106 +++++
 test/ELF/many-sections.s                    |   9 +-
 test/ELF/tls-dynamic-i686.s                 |  46 +-
 test/ELF/tls-opt-iele-i686-nopic.s          |  44 +-
 test/ELF/x86-64-reloc-tpoff32-fpic.s        |  14 +
 unittests/CMakeLists.txt                    |   1 -
 unittests/CoreTests/CMakeLists.txt          |   7 -
 unittests/CoreTests/ParallelTest.cpp        |  46 --
 57 files changed, 1213 insertions(+), 1239 deletions(-)
 delete mode 100644 include/lld/Core/Parallel.h
 delete mode 100644 include/lld/Core/TaskGroup.h
 delete mode 100644 lib/Core/TaskGroup.cpp
 delete mode 100644 test/COFF/constant-export.yaml
 delete mode 100644 test/ELF/Inputs/i386-static-tls-model1.s
 delete mode 100644 test/ELF/Inputs/i386-static-tls-model2.s
 delete mode 100644 test/ELF/Inputs/i386-static-tls-model3.s
 delete mode 100644 test/ELF/Inputs/i386-static-tls-model4.s
 create mode 100644 test/ELF/gdb-index-empty.s
 create mode 100644 test/ELF/gdb-index-gc-sections.s
 delete mode 100644 test/ELF/i386-static-tls-model.s
 create mode 100644 test/ELF/linkerscript/early-assign-symbol.s
 create mode 100644 test/ELF/linkerscript/symbol-memoryexpr.s
 create mode 100644 test/ELF/many-alloc-sections.s
 create mode 100644 test/ELF/x86-64-reloc-tpoff32-fpic.s
 delete mode 100644 unittests/CoreTests/CMakeLists.txt
 delete mode 100644 unittests/CoreTests/ParallelTest.cpp

diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp
index 5a15b5b11507..4c0ea44b875e 100644
--- a/COFF/Driver.cpp
+++ b/COFF/Driver.cpp
@@ -18,7 +18,6 @@
 #include "lld/Driver/Driver.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/LibDriver/LibDriver.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
@@ -29,6 +28,7 @@
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
 #include <algorithm>
 #include <memory>
 
diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp
index 9a43f2bd43f5..3b7cc424f0a2 100644
--- a/COFF/ICF.cpp
+++ b/COFF/ICF.cpp
@@ -21,9 +21,9 @@
 #include "Chunks.h"
 #include "Error.h"
 #include "Symbols.h"
-#include "lld/Core/Parallel.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <atomic>
@@ -192,7 +192,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
   // Split sections into 256 shards and call Fn in parallel.
   size_t NumShards = 256;
   size_t Step = Chunks.size() / NumShards;
-  parallel_for(size_t(0), NumShards, [&](size_t I) {
+  for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
     forEachClassRange(I * Step, (I + 1) * Step, Fn);
   });
   forEachClassRange(Step * NumShards, Chunks.size(), Fn);
diff --git a/COFF/MapFile.cpp b/COFF/MapFile.cpp
index 4e596e602fee..b63d4672c7d5 100644
--- a/COFF/MapFile.cpp
+++ b/COFF/MapFile.cpp
@@ -25,7 +25,7 @@
 #include "Symbols.h"
 #include "Writer.h"
 
-#include "lld/Core/Parallel.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -76,7 +76,7 @@ static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) {
 static DenseMap<DefinedRegular *, std::string>
 getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
   std::vector<std::string> Str(Syms.size());
-  parallel_for((size_t)0, Syms.size(), [&](size_t I) {
+  for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
     raw_string_ostream OS(Str[I]);
     writeHeader(OS, Syms[I]->getRVA(), 0, 0);
     OS << indent(2) << toString(*Syms[I]);
diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp
index 8762b88c4d6b..5c9c8375dadc 100644
--- a/COFF/Writer.cpp
+++ b/COFF/Writer.cpp
@@ -17,13 +17,13 @@
 #include "PDB.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
-#include "lld/Core/Parallel.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -745,8 +745,8 @@ void Writer::writeSections() {
     // ADD instructions).
     if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE)
       memset(SecBuf, 0xCC, Sec->getRawSize());
-    parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(),
-                      [&](Chunk *C) { C->writeTo(SecBuf); });
+    for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(),
+             [&](Chunk *C) { C->writeTo(SecBuf); });
   }
 }
 
@@ -760,16 +760,14 @@ void Writer::sortExceptionTable() {
   uint8_t *End = Begin + Sec->getVirtualSize();
   if (Config->Machine == AMD64) {
     struct Entry { ulittle32_t Begin, End, Unwind; };
-    parallel_sort(
-        (Entry *)Begin, (Entry *)End,
-        [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+    sort(parallel::par, (Entry *)Begin, (Entry *)End,
+         [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
     return;
   }
   if (Config->Machine == ARMNT) {
     struct Entry { ulittle32_t Begin, Unwind; };
-    parallel_sort(
-        (Entry *)Begin, (Entry *)End,
-        [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
+    sort(parallel::par, (Entry *)Begin, (Entry *)End,
+         [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
     return;
   }
   errs() << "warning: don't know how to handle .pdata.\n";
diff --git a/ELF/Config.h b/ELF/Config.h
index 0321c84e7106..57a0e5a5ec73 100644
--- a/ELF/Config.h
+++ b/ELF/Config.h
@@ -73,7 +73,6 @@ struct VersionDefinition {
 // Most fields are initialized by the driver.
 struct Configuration {
   InputFile *FirstElf = nullptr;
-  bool HasStaticTlsModel = false;
   uint8_t OSABI = 0;
   llvm::CachePruningPolicy ThinLTOCachePolicy;
   llvm::StringMap<uint64_t> SectionStartMap;
diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp
index c2cfe3c4129e..737c6a6bf114 100644
--- a/ELF/Driver.cpp
+++ b/ELF/Driver.cpp
@@ -284,7 +284,7 @@ static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) {
   int V = Default;
   if (auto *Arg = Args.getLastArg(Key)) {
     StringRef S = Arg->getValue();
-    if (S.getAsInteger(10, V))
+    if (!to_integer(S, V, 10))
       error(Arg->getSpelling() + ": number expected, but got " + S);
   }
   return V;
@@ -311,7 +311,7 @@ static uint64_t getZOptionValue(opt::InputArgList &Args, StringRef Key,
     if (Pos != StringRef::npos && Key == Value.substr(0, Pos)) {
       Value = Value.substr(Pos + 1);
       uint64_t Result;
-      if (Value.getAsInteger(0, Result))
+      if (!to_integer(Value, Result))
         error("invalid " + Key + ": " + Value);
       return Result;
     }
@@ -522,7 +522,7 @@ static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) {
   uint64_t VA = 0;
   if (S.startswith("0x"))
     S = S.drop_front(2);
-  if (S.getAsInteger(16, VA))
+  if (!to_integer(S, VA, 16))
     error("invalid argument: " + toString(Arg));
   return VA;
 }
@@ -886,7 +886,7 @@ static uint64_t getImageBase(opt::InputArgList &Args) {
 
   StringRef S = Arg->getValue();
   uint64_t V;
-  if (S.getAsInteger(0, V)) {
+  if (!to_integer(S, V)) {
     error("-image-base: number expected, but got " + S);
     return 0;
   }
diff --git a/ELF/GdbIndex.h b/ELF/GdbIndex.h
index a36b92714def..03fec64f9bd5 100644
--- a/ELF/GdbIndex.h
+++ b/ELF/GdbIndex.h
@@ -21,7 +21,7 @@ class InputSection;
 
 // Struct represents single entry of address area of gdb index.
 struct AddressEntry {
-  InputSectionBase *Section;
+  InputSection *Section;
   uint64_t LowAddress;
   uint64_t HighAddress;
   size_t CuIndex;
diff --git a/ELF/ICF.cpp b/ELF/ICF.cpp
index dcf01ea80011..3722d4e3ed2f 100644
--- a/ELF/ICF.cpp
+++ b/ELF/ICF.cpp
@@ -325,7 +325,7 @@ void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) {
   // Split sections into 256 shards and call Fn in parallel.
   size_t NumShards = 256;
   size_t Step = Sections.size() / NumShards;
-  parallelFor(0, NumShards, [&](size_t I) {
+  parallelForEachN(0, NumShards, [&](size_t I) {
     forEachClassRange(I * Step, (I + 1) * Step, Fn);
   });
   forEachClassRange(Step * NumShards, Sections.size(), Fn);
diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp
index 5f94fc9338a4..fe036a644f41 100644
--- a/ELF/InputFiles.cpp
+++ b/ELF/InputFiles.cpp
@@ -383,9 +383,9 @@ elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec,
     // we see. The eglibc ARM dynamic loaders require the presence of an
     // attribute section for dlopen to work.
     // In a full implementation we would merge all attribute sections.
-    if (In<ELFT>::ARMAttributes == nullptr) {
-      In<ELFT>::ARMAttributes = make<InputSection>(this, &Sec, Name);
-      return In<ELFT>::ARMAttributes;
+    if (InX::ARMAttributes == nullptr) {
+      InX::ARMAttributes = make<InputSection>(this, &Sec, Name);
+      return InX::ARMAttributes;
     }
     return &InputSection::Discarded;
   case SHT_RELA:
diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp
index c082f128a9bc..87896ec96b29 100644
--- a/ELF/InputSection.cpp
+++ b/ELF/InputSection.cpp
@@ -324,7 +324,7 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
     // section, but for --emit-relocs it is an virtual address.
     P->r_offset = RelocatedSection->OutSec->Addr +
                   RelocatedSection->getOffset(Rel.r_offset);
-    P->setSymbolAndType(In<ELFT>::SymTab->getSymbolIndex(&Body), Type,
+    P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Body), Type,
                         Config->IsMips64EL);
 
     if (Body.Type == STT_SECTION) {
@@ -400,40 +400,40 @@ getRelocTargetVA(uint32_t Type, int64_t A, typename ELFT::uint P,
     return Body.getVA(A);
   case R_GOT:
   case R_RELAX_TLS_GD_TO_IE_ABS:
-    return Body.getGotVA<ELFT>() + A;
+    return Body.getGotVA() + A;
   case R_GOTONLY_PC:
-    return In<ELFT>::Got->getVA() + A - P;
+    return InX::Got->getVA() + A - P;
   case R_GOTONLY_PC_FROM_END:
-    return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize();
+    return InX::Got->getVA() + A - P + InX::Got->getSize();
   case R_GOTREL:
-    return Body.getVA(A) - In<ELFT>::Got->getVA();
+    return Body.getVA(A) - InX::Got->getVA();
   case R_GOTREL_FROM_END:
-    return Body.getVA(A) - In<ELFT>::Got->getVA() - In<ELFT>::Got->getSize();
+    return Body.getVA(A) - InX::Got->getVA() - InX::Got->getSize();
   case R_GOT_FROM_END:
   case R_RELAX_TLS_GD_TO_IE_END:
-    return Body.getGotOffset() + A - In<ELFT>::Got->getSize();
+    return Body.getGotOffset() + A - InX::Got->getSize();
   case R_GOT_OFF:
     return Body.getGotOffset() + A;
   case R_GOT_PAGE_PC:
   case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
-    return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P);
+    return getAArch64Page(Body.getGotVA() + A) - getAArch64Page(P);
   case R_GOT_PC:
   case R_RELAX_TLS_GD_TO_IE:
-    return Body.getGotVA<ELFT>() + A - P;
+    return Body.getGotVA() + A - P;
   case R_HINT:
   case R_NONE:
   case R_TLSDESC_CALL:
     llvm_unreachable("cannot relocate hint relocs");
   case R_MIPS_GOTREL:
-    return Body.getVA(A) - In<ELFT>::MipsGot->getGp();
+    return Body.getVA(A) - InX::MipsGot->getGp();
   case R_MIPS_GOT_GP:
-    return In<ELFT>::MipsGot->getGp() + A;
+    return InX::MipsGot->getGp() + A;
   case R_MIPS_GOT_GP_PC: {
     // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
     // is _gp_disp symbol. In that case we should use the following
     // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at
     // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
-    uint64_t V = In<ELFT>::MipsGot->getGp() + A - P;
+    uint64_t V = InX::MipsGot->getGp() + A - P;
     if (Type == R_MIPS_LO16)
       V += 4;
     return V;
@@ -442,24 +442,21 @@ getRelocTargetVA(uint32_t Type, int64_t A, typename ELFT::uint P,
     // If relocation against MIPS local symbol requires GOT entry, this entry
     // should be initialized by 'page address'. This address is high 16-bits
     // of sum the symbol's value and the addend.
-    return In<ELFT>::MipsGot->getVA() +
-           In<ELFT>::MipsGot->getPageEntryOffset(Body, A) -
-           In<ELFT>::MipsGot->getGp();
+    return InX::MipsGot->getVA() + InX::MipsGot->getPageEntryOffset(Body, A) -
+           InX::MipsGot->getGp();
   case R_MIPS_GOT_OFF:
   case R_MIPS_GOT_OFF32:
     // In case of MIPS if a GOT relocation has non-zero addend this addend
     // should be applied to the GOT entry content not to the GOT entry offset.
     // That is why we use separate expression type.
-    return In<ELFT>::MipsGot->getVA() +
-           In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) -
-           In<ELFT>::MipsGot->getGp();
+    return InX::MipsGot->getVA() + InX::MipsGot->getBodyEntryOffset(Body, A) -
+           InX::MipsGot->getGp();
   case R_MIPS_TLSGD:
-    return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() +
-           In<ELFT>::MipsGot->getGlobalDynOffset(Body) -
-           In<ELFT>::MipsGot->getGp();
+    return InX::MipsGot->getVA() + InX::MipsGot->getTlsOffset() +
+           InX::MipsGot->getGlobalDynOffset(Body) - InX::MipsGot->getGp();
   case R_MIPS_TLSLD:
-    return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() +
-           In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp();
+    return InX::MipsGot->getVA() + InX::MipsGot->getTlsOffset() +
+           InX::MipsGot->getTlsIndexOff() - InX::MipsGot->getGp();
   case R_PAGE_PC:
   case R_PLT_PAGE_PC:
     if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak())
@@ -523,19 +520,18 @@ getRelocTargetVA(uint32_t Type, int64_t A, typename ELFT::uint P,
   case R_SIZE:
     return Body.getSize<ELFT>() + A;
   case R_TLSDESC:
-    return In<ELFT>::Got->getGlobalDynAddr(Body) + A;
+    return InX::Got->getGlobalDynAddr(Body) + A;
   case R_TLSDESC_PAGE:
-    return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) -
+    return getAArch64Page(InX::Got->getGlobalDynAddr(Body) + A) -
            getAArch64Page(P);
   case R_TLSGD:
-    return In<ELFT>::Got->getGlobalDynOffset(Body) + A -
-           In<ELFT>::Got->getSize();
+    return InX::Got->getGlobalDynOffset(Body) + A - InX::Got->getSize();
   case R_TLSGD_PC:
-    return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P;
+    return InX::Got->getGlobalDynAddr(Body) + A - P;
   case R_TLSLD:
-    return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize();
+    return InX::Got->getTlsIndexOff() + A - InX::Got->getSize();
   case R_TLSLD_PC:
-    return In<ELFT>::Got->getTlsIndexVA() + A - P;
+    return InX::Got->getTlsIndexVA() + A - P;
   }
   llvm_unreachable("Invalid expression");
 }
diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp
index d7858e173c7b..161909abf00d 100644
--- a/ELF/LinkerScript.cpp
+++ b/ELF/LinkerScript.cpp
@@ -48,8 +48,12 @@ using namespace lld::elf;
 LinkerScript *elf::Script;
 
 uint64_t ExprValue::getValue() const {
-  if (Sec)
-    return Sec->getOffset(Val) + Sec->getOutputSection()->Addr;
+  if (Sec) {
+    if (Sec->getOutputSection())
+      return Sec->getOffset(Val) + Sec->getOutputSection()->Addr;
+    error("unable to evaluate expression: input section " + Sec->Name +
+          " has no output section assigned");
+  }
   return Val;
 }
 
@@ -411,6 +415,7 @@ void LinkerScript::processCommands(OutputSectionFactory &Factory) {
       if (OutputSection *Sec = Cmd->Sec) {
         assert(Sec->SectionIndex == INT_MAX);
         Sec->SectionIndex = I;
+        SecToCommand[Sec] = Cmd;
       }
     }
   }
@@ -440,6 +445,7 @@ void LinkerScript::fabricateDefaultCommands() {
 
     auto *OSCmd = make<OutputSectionCommand>(Sec->Name);
     OSCmd->Sec = Sec;
+    SecToCommand[Sec] = OSCmd;
 
     // Prefer user supplied address over additional alignment constraint
     auto I = Config->SectionStartMap.find(Sec->Name);
@@ -484,6 +490,7 @@ void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) {
       auto *Cmd = cast<OutputSectionCommand>(*I);
       Factory.addInputSec(S, Name, Cmd->Sec);
       if (OutputSection *Sec = Cmd->Sec) {
+        SecToCommand[Sec] = Cmd;
         unsigned Index = std::distance(Opt.Commands.begin(), I);
         assert(Sec->SectionIndex == INT_MAX || Sec->SectionIndex == Index);
         Sec->SectionIndex = Index;
@@ -699,6 +706,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
     OutSec->SectionIndex = I;
     OutputSections->push_back(OutSec);
     Cmd->Sec = OutSec;
+    SecToCommand[OutSec] = Cmd;
   }
 }
 
@@ -822,16 +830,14 @@ void LinkerScript::placeOrphanSections() {
     // If there is no command corresponding to this output section,
     // create one and put a InputSectionDescription in it so that both
     // representations agree on which input sections to use.
-    auto Pos = std::find_if(CmdIter, E, [&](BaseCommand *Base) {
-      auto *Cmd = dyn_cast<OutputSectionCommand>(Base);
-      return Cmd && Cmd->Name == Name;
-    });
-    if (Pos == E) {
-      auto *Cmd = make<OutputSectionCommand>(Name);
+    OutputSectionCommand *Cmd = getCmd(Sec);
+    if (!Cmd) {
+      Cmd = make<OutputSectionCommand>(Name);
       Opt.Commands.insert(CmdIter, Cmd);
       ++CmdIndex;
 
       Cmd->Sec = Sec;
+      SecToCommand[Sec] = Cmd;
       auto *ISD = make<InputSectionDescription>("");
       for (InputSection *IS : Sec->Sections)
         ISD->Sections.push_back(IS);
@@ -841,7 +847,11 @@ void LinkerScript::placeOrphanSections() {
     }
 
     // Continue from where we found it.
-    CmdIndex = (Pos - Opt.Commands.begin()) + 1;
+    while (*CmdIter != Cmd) {
+      ++CmdIter;
+      ++CmdIndex;
+    }
+    ++CmdIndex;
   }
 }
 
@@ -1000,7 +1010,7 @@ std::vector<PhdrEntry> LinkerScript::createPhdrs() {
       break;
 
     // Assign headers specified by linker script
-    for (size_t Id : getPhdrIndices(Sec->Name)) {
+    for (size_t Id : getPhdrIndices(Sec)) {
       Ret[Id].add(Sec);
       if (Opt.PhdrsCommands[Id].Flags == UINT_MAX)
         Ret[Id].p_flags |= Sec->getPhdrFlags();
@@ -1020,11 +1030,16 @@ bool LinkerScript::ignoreInterpSection() {
   return true;
 }
 
-Optional<uint32_t> LinkerScript::getFiller(StringRef Name) {
-  for (BaseCommand *Base : Opt.Commands)
-    if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base))
-      if (Cmd->Name == Name)
-        return Cmd->Filler;
+OutputSectionCommand *LinkerScript::getCmd(OutputSection *Sec) const {
+  auto I = SecToCommand.find(Sec);
+  if (I == SecToCommand.end())
+    return nullptr;
+  return I->second;
+}
+
+Optional<uint32_t> LinkerScript::getFiller(OutputSection *Sec) {
+  if (OutputSectionCommand *Cmd = getCmd(Sec))
+    return Cmd->Filler;
   return None;
 }
 
@@ -1042,26 +1057,16 @@ static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) {
 }
 
 void LinkerScript::writeDataBytes(OutputSection *Sec, uint8_t *Buf) {
-  auto I = std::find_if(Opt.Commands.begin(), Opt.Commands.end(),
-                        [=](BaseCommand *Base) {
-                          if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base))
-                            if (Cmd->Sec == Sec)
-                              return true;
-                          return false;
-                        });
-  if (I == Opt.Commands.end())
-    return;
-  auto *Cmd = cast<OutputSectionCommand>(*I);
-  for (BaseCommand *Base : Cmd->Commands)
-    if (auto *Data = dyn_cast<BytesDataCommand>(Base))
-      writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size);
+  if (OutputSectionCommand *Cmd = getCmd(Sec))
+    for (BaseCommand *Base : Cmd->Commands)
+      if (auto *Data = dyn_cast<BytesDataCommand>(Base))
+        writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size);
 }
 
-bool LinkerScript::hasLMA(StringRef Name) {
-  for (BaseCommand *Base : Opt.Commands)
-    if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base))
-      if (Cmd->LMAExpr && Cmd->Name == Name)
-        return true;
+bool LinkerScript::hasLMA(OutputSection *Sec) {
+  if (OutputSectionCommand *Cmd = getCmd(Sec))
+    if (Cmd->LMAExpr)
+      return true;
   return false;
 }
 
@@ -1080,15 +1085,10 @@ ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) {
 
 bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; }
 
-// Returns indices of ELF headers containing specific section, identified
-// by Name. Each index is a zero based number of ELF header listed within
-// PHDRS {} script block.
-std::vector<size_t> LinkerScript::getPhdrIndices(StringRef SectionName) {
-  for (BaseCommand *Base : Opt.Commands) {
-    auto *Cmd = dyn_cast<OutputSectionCommand>(Base);
-    if (!Cmd || Cmd->Name != SectionName)
-      continue;
-
+// Returns indices of ELF headers containing specific section. Each index is a
+// zero based number of ELF header listed within PHDRS {} script block.
+std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Sec) {
+  if (OutputSectionCommand *Cmd = getCmd(Sec)) {
     std::vector<size_t> Ret;
     for (StringRef PhdrName : Cmd->Phdrs)
       Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName));
diff --git a/ELF/LinkerScript.h b/ELF/LinkerScript.h
index 7bcd21c87602..d0a4d83d72b0 100644
--- a/ELF/LinkerScript.h
+++ b/ELF/LinkerScript.h
@@ -211,8 +211,9 @@ struct ScriptConfiguration {
   std::vector<llvm::StringRef> ReferencedSymbols;
 };
 
-class LinkerScript {
-protected:
+class LinkerScript final {
+  llvm::DenseMap<OutputSection *, OutputSectionCommand *> SecToCommand;
+  OutputSectionCommand *getCmd(OutputSection *Sec) const;
   void assignSymbol(SymbolAssignment *Cmd, bool InSec);
   void setDot(Expr E, const Twine &Loc, bool InSec);
 
@@ -222,7 +223,7 @@ class LinkerScript {
   std::vector<InputSectionBase *>
   createInputSectionList(OutputSectionCommand &Cmd);
 
-  std::vector<size_t> getPhdrIndices(StringRef SectionName);
+  std::vector<size_t> getPhdrIndices(OutputSection *Sec);
   size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName);
 
   MemoryRegion *findMemoryRegion(OutputSectionCommand *Cmd);
@@ -262,8 +263,8 @@ class LinkerScript {
   std::vector<PhdrEntry> createPhdrs();
   bool ignoreInterpSection();
 
-  llvm::Optional<uint32_t> getFiller(StringRef Name);
-  bool hasLMA(StringRef Name);
+  llvm::Optional<uint32_t> getFiller(OutputSection *Sec);
+  bool hasLMA(OutputSection *Sec);
   bool shouldKeep(InputSectionBase *S);
   void assignOffsets(OutputSectionCommand *Cmd);
   void placeOrphanSections();
diff --git a/ELF/MapFile.cpp b/ELF/MapFile.cpp
index af5bc3c2c813..23c63e845c9a 100644
--- a/ELF/MapFile.cpp
+++ b/ELF/MapFile.cpp
@@ -84,7 +84,7 @@ template <class ELFT>
 DenseMap<DefinedRegular *, std::string>
 getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
   std::vector<std::string> Str(Syms.size());
-  parallelFor(0, Syms.size(), [&](size_t I) {
+  parallelForEachN(0, Syms.size(), [&](size_t I) {
     raw_string_ostream OS(Str[I]);
     writeHeader<ELFT>(OS, Syms[I]->getVA(), Syms[I]->template getSize<ELFT>(),
                       0);
diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp
index cb9c57657af3..dcefd03766d7 100644
--- a/ELF/OutputSections.cpp
+++ b/ELF/OutputSections.cpp
@@ -133,7 +133,7 @@ template <class ELFT> void OutputSection::finalize() {
   if (isa<SyntheticSection>(First))
     return;
 
-  this->Link = In<ELFT>::SymTab->OutSec->SectionIndex;
+  this->Link = InX::SymTab->OutSec->SectionIndex;
   // sh_info for SHT_REL[A] sections should contain the section header index of
   // the section to which the relocation applies.
   InputSectionBase *S = First->getRelocatedSection();
@@ -273,7 +273,7 @@ uint32_t OutputSection::getFiller() {
   // linker script. If nothing is specified and this is an executable section,
   // fall back to trap instructions to prevent bad diassembly and detect invalid
   // jumps to padding.
-  if (Optional<uint32_t> Filler = Script->getFiller(Name))
+  if (Optional<uint32_t> Filler = Script->getFiller(this))
     return *Filler;
   if (Flags & SHF_EXECINSTR)
     return Target->TrapInstr;
@@ -297,7 +297,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *Buf) {
   if (Filler)
     fill(Buf, Sections.empty() ? Size : Sections[0]->OutSecOff, Filler);
 
-  parallelFor(0, Sections.size(), [=](size_t I) {
+  parallelForEachN(0, Sections.size(), [=](size_t I) {
     InputSection *Sec = Sections[I];
     Sec->writeTo<ELFT>(Buf);
 
@@ -429,8 +429,11 @@ void OutputSectionFactory::addInputSec(InputSectionBase *IS,
       if (canMergeToProgbits(Sec->Type) && canMergeToProgbits(IS->Type))
         Sec->Type = SHT_PROGBITS;
       else
-        error("Section has different type from others with the same name " +
-              toString(IS));
+        error("section type mismatch for " + IS->Name +
+              "\n>>> " + toString(IS) + ": " +
+              getELFSectionTypeName(Config->EMachine, IS->Type) +
+              "\n>>> output section " + Sec->Name + ": " +
+              getELFSectionTypeName(Config->EMachine, Sec->Type));
     }
     Sec->Flags |= Flags;
   } else {
diff --git a/ELF/OutputSections.h b/ELF/OutputSections.h
index 6405fb38c6d6..413871b60cf7 100644
--- a/ELF/OutputSections.h
+++ b/ELF/OutputSections.h
@@ -50,6 +50,7 @@ class OutputSection final : public SectionBase {
   template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr);
 
   unsigned SectionIndex;
+  unsigned SortRank;
 
   uint32_t getPhdrFlags() const;
 
diff --git a/ELF/Relocations.cpp b/ELF/Relocations.cpp
index f5db931e9755..ea7477e03842 100644
--- a/ELF/Relocations.cpp
+++ b/ELF/Relocations.cpp
@@ -106,21 +106,21 @@ static unsigned handleMipsTlsRelocation(uint32_t Type, SymbolBody &Body,
                                         InputSectionBase &C, uint64_t Offset,
                                         int64_t Addend, RelExpr Expr) {
   if (Expr == R_MIPS_TLSLD) {
-    if (In<ELFT>::MipsGot->addTlsIndex() && Config->Pic)
-      In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::MipsGot,
-                                   In<ELFT>::MipsGot->getTlsIndexOff(), false,
+    if (InX::MipsGot->addTlsIndex() && Config->Pic)
+      In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, InX::MipsGot,
+                                   InX::MipsGot->getTlsIndexOff(), false,
                                    nullptr, 0});
     C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
     return 1;
   }
 
   if (Expr == R_MIPS_TLSGD) {
-    if (In<ELFT>::MipsGot->addDynTlsEntry(Body) && Body.isPreemptible()) {
-      uint64_t Off = In<ELFT>::MipsGot->getGlobalDynOffset(Body);
+    if (InX::MipsGot->addDynTlsEntry(Body) && Body.isPreemptible()) {
+      uint64_t Off = InX::MipsGot->getGlobalDynOffset(Body);
       In<ELFT>::RelaDyn->addReloc(
-          {Target->TlsModuleIndexRel, In<ELFT>::MipsGot, Off, false, &Body, 0});
+          {Target->TlsModuleIndexRel, InX::MipsGot, Off, false, &Body, 0});
       if (Body.isPreemptible())
-        In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::MipsGot,
+        In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, InX::MipsGot,
                                      Off + Config->Wordsize, false, &Body, 0});
     }
     C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
@@ -156,17 +156,17 @@ static unsigned handleARMTlsRelocation(uint32_t Type, SymbolBody &Body,
   auto AddTlsReloc = [&](uint64_t Off, uint32_t Type, SymbolBody *Dest,
                          bool Dyn) {
     if (Dyn)
-      In<ELFT>::RelaDyn->addReloc({Type, In<ELFT>::Got, Off, false, Dest, 0});
+      In<ELFT>::RelaDyn->addReloc({Type, InX::Got, Off, false, Dest, 0});
     else
-      In<ELFT>::Got->Relocations.push_back({R_ABS, Type, Off, 0, Dest});
+      InX::Got->Relocations.push_back({R_ABS, Type, Off, 0, Dest});
   };
 
   // Local Dynamic is for access to module local TLS variables, while still
   // being suitable for being dynamically loaded via dlopen.
   // GOT[e0] is the module index, with a special value of 0 for the current
   // module. GOT[e1] is unused. There only needs to be one module index entry.
-  if (Expr == R_TLSLD_PC && In<ELFT>::Got->addTlsIndex()) {
-    AddTlsReloc(In<ELFT>::Got->getTlsIndexOff(), Target->TlsModuleIndexRel,
+  if (Expr == R_TLSLD_PC && InX::Got->addTlsIndex()) {
+    AddTlsReloc(InX::Got->getTlsIndexOff(), Target->TlsModuleIndexRel,
                 NeedDynId ? nullptr : &Body, NeedDynId);
     C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
     return 1;
@@ -176,8 +176,8 @@ static unsigned handleARMTlsRelocation(uint32_t Type, SymbolBody &Body,
   // the module index and offset of symbol in TLS block we can fill these in
   // using static GOT relocations.
   if (Expr == R_TLSGD_PC) {
-    if (In<ELFT>::Got->addDynTlsEntry(Body)) {
-      uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body);
+    if (InX::Got->addDynTlsEntry(Body)) {
+      uint64_t Off = InX::Got->getGlobalDynOffset(Body);
       AddTlsReloc(Off, Target->TlsModuleIndexRel, &Body, NeedDynId);
       AddTlsReloc(Off + Config->Wordsize, Target->TlsOffsetRel, &Body,
                   NeedDynOff);
@@ -207,10 +207,10 @@ handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C,
   bool IsPreemptible = isPreemptible(Body, Type);
   if (isRelExprOneOf<R_TLSDESC, R_TLSDESC_PAGE, R_TLSDESC_CALL>(Expr) &&
       Config->Shared) {
-    if (In<ELFT>::Got->addDynTlsEntry(Body)) {
-      uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body);
-      In<ELFT>::RelaDyn->addReloc({Target->TlsDescRel, In<ELFT>::Got, Off,
-                                   !IsPreemptible, &Body, 0});
+    if (InX::Got->addDynTlsEntry(Body)) {
+      uint64_t Off = InX::Got->getGlobalDynOffset(Body);
+      In<ELFT>::RelaDyn->addReloc(
+          {Target->TlsDescRel, InX::Got, Off, !IsPreemptible, &Body, 0});
     }
     if (Expr != R_TLSDESC_CALL)
       C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
@@ -224,10 +224,10 @@ handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C,
           {R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Body});
       return 2;
     }
-    if (In<ELFT>::Got->addTlsIndex())
-      In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::Got,
-                                   In<ELFT>::Got->getTlsIndexOff(), false,
-                                   nullptr, 0});
+    if (InX::Got->addTlsIndex())
+      In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, InX::Got,
+                                   InX::Got->getTlsIndexOff(), false, nullptr,
+                                   0});
     C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
     return 1;
   }
@@ -242,19 +242,19 @@ handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C,
   if (isRelExprOneOf<R_TLSDESC, R_TLSDESC_PAGE, R_TLSDESC_CALL, R_TLSGD,
                      R_TLSGD_PC>(Expr)) {
     if (Config->Shared) {
-      if (In<ELFT>::Got->addDynTlsEntry(Body)) {
-        uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body);
+      if (InX::Got->addDynTlsEntry(Body)) {
+        uint64_t Off = InX::Got->getGlobalDynOffset(Body);
         In<ELFT>::RelaDyn->addReloc(
-            {Target->TlsModuleIndexRel, In<ELFT>::Got, Off, false, &Body, 0});
+            {Target->TlsModuleIndexRel, InX::Got, Off, false, &Body, 0});
 
         // If the symbol is preemptible we need the dynamic linker to write
         // the offset too.
         uint64_t OffsetOff = Off + Config->Wordsize;
         if (IsPreemptible)
-          In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::Got,
-                                       OffsetOff, false, &Body, 0});
+          In<ELFT>::RelaDyn->addReloc(
+              {Target->TlsOffsetRel, InX::Got, OffsetOff, false, &Body, 0});
         else
-          In<ELFT>::Got->Relocations.push_back(
+          InX::Got->Relocations.push_back(
               {R_ABS, Target->TlsOffsetRel, OffsetOff, 0, &Body});
       }
       C.Relocations.push_back({Expr, Type, Offset, Addend, &Body});
@@ -268,8 +268,8 @@ handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C,
           {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_IE), Type,
            Offset, Addend, &Body});
       if (!Body.isInGot()) {
-        In<ELFT>::Got->addEntry(Body);
-        In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::Got,
+        InX::Got->addEntry(Body);
+        In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, InX::Got,
                                      Body.getGotOffset(), false, &Body, 0});
       }
     } else {
@@ -518,7 +518,7 @@ template <class ELFT> static void addCopyRelSymbol(SharedSymbol *SS) {
   // See if this symbol is in a read-only segment. If so, preserve the symbol's
   // memory protection by reserving space in the .bss.rel.ro section.
   bool IsReadOnly = isReadOnly<ELFT>(SS);
-  BssSection *Sec = IsReadOnly ? In<ELFT>::BssRelRo : In<ELFT>::Bss;
+  BssSection *Sec = IsReadOnly ? InX::BssRelRo : InX::Bss;
   uint64_t Off = Sec->reserveSpace(SymSize, SS->getAlignment<ELFT>());
 
   // Look through the DSO's dynamic symbol table for aliases and create a
@@ -774,7 +774,7 @@ static void addPltEntry(PltSection *Plt, GotPltSection *GotPlt,
 
 template <class ELFT>
 static void addGotEntry(SymbolBody &Sym, bool Preemptible) {
-  In<ELFT>::Got->addEntry(Sym);
+  InX::Got->addEntry(Sym);
 
   uint64_t Off = Sym.getGotOffset();
   uint32_t DynType;
@@ -792,10 +792,10 @@ static void addGotEntry(SymbolBody &Sym, bool Preemptible) {
   bool Constant = !Preemptible && !(Config->Pic && !isAbsolute(Sym));
   if (!Constant)
     In<ELFT>::RelaDyn->addReloc(
-        {DynType, In<ELFT>::Got, Off, !Preemptible, &Sym, 0});
+        {DynType, InX::Got, Off, !Preemptible, &Sym, 0});
 
   if (Constant || (!Config->IsRela && !Preemptible))
-    In<ELFT>::Got->Relocations.push_back({Expr, DynType, Off, 0, &Sym});
+    InX::Got->Relocations.push_back({Expr, DynType, Off, 0, &Sym});
 }
 
 // The reason we have to do this early scan is as follows
@@ -856,7 +856,7 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
     // needs it to be created. Here we request for that.
     if (isRelExprOneOf<R_GOTONLY_PC, R_GOTONLY_PC_FROM_END, R_GOTREL,
                        R_GOTREL_FROM_END, R_PPC_TOC>(Expr))
-      In<ELFT>::Got->HasGotOffRel = true;
+      InX::Got->HasGotOffRel = true;
 
     // Read an addend.
     int64_t Addend = computeAddend<ELFT>(Rel, Sec.Data.data());
@@ -874,11 +874,11 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
     // If a relocation needs PLT, we create PLT and GOTPLT slots for the symbol.
     if (needsPlt(Expr) && !Body.isInPlt()) {
       if (Body.isGnuIFunc() && !Preemptible)
-        addPltEntry(InX::Iplt, In<ELFT>::IgotPlt, In<ELFT>::RelaIplt,
+        addPltEntry(InX::Iplt, InX::IgotPlt, In<ELFT>::RelaIplt,
                     Target->IRelativeRel, Body, true);
       else
-        addPltEntry(InX::Plt, In<ELFT>::GotPlt, In<ELFT>::RelaPlt,
-                    Target->PltRel, Body, !Preemptible);
+        addPltEntry(InX::Plt, InX::GotPlt, In<ELFT>::RelaPlt, Target->PltRel,
+                    Body, !Preemptible);
     }
 
     // Create a GOT slot if a relocation needs GOT.
@@ -891,9 +891,9 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
         // See "Global Offset Table" in Chapter 5 in the following document
         // for detailed description:
         // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
-        In<ELFT>::MipsGot->addEntry(Body, Addend, Expr);
+        InX::MipsGot->addEntry(Body, Addend, Expr);
         if (Body.isTls() && Body.isPreemptible())
-          In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::MipsGot,
+          In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, InX::MipsGot,
                                        Body.getGotOffset(), false, &Body, 0});
       } else if (!Body.isInGot()) {
         addGotEntry<ELFT>(Body, Preemptible);
@@ -927,7 +927,7 @@ static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) {
       // a dynamic relocation.
       // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf p.4-19
       if (Config->EMachine == EM_MIPS)
-        In<ELFT>::MipsGot->addEntry(Body, Addend, Expr);
+        InX::MipsGot->addEntry(Body, Addend, Expr);
       continue;
     }
 
diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp
index 032ecd50f3e3..f1bc245c9256 100644
--- a/ELF/ScriptParser.cpp
+++ b/ELF/ScriptParser.cpp
@@ -639,7 +639,7 @@ ScriptParser::readOutputSectionDescription(StringRef OutSec) {
 // We are compatible with ld.gold because it's easier to implement.
 uint32_t ScriptParser::parseFill(StringRef Tok) {
   uint32_t V = 0;
-  if (Tok.getAsInteger(0, V))
+  if (!to_integer(Tok, V))
     setError("invalid filler expression: " + Tok);
 
   uint32_t Buf;
@@ -778,23 +778,23 @@ static Optional<uint64_t> parseInt(StringRef Tok) {
 
   // Hexadecimal
   uint64_t Val;
-  if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val))
+  if (Tok.startswith_lower("0x") && to_integer(Tok.substr(2), Val, 16))
     return Val;
-  if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val))
+  if (Tok.endswith_lower("H") && to_integer(Tok.drop_back(), Val, 16))
     return Val;
 
   // Decimal
   if (Tok.endswith_lower("K")) {
-    if (Tok.drop_back().getAsInteger(10, Val))
+    if (!to_integer(Tok.drop_back(), Val, 10))
       return None;
     return Val * 1024;
   }
   if (Tok.endswith_lower("M")) {
-    if (Tok.drop_back().getAsInteger(10, Val))
+    if (!to_integer(Tok.drop_back(), Val, 10))
       return None;
     return Val * 1024 * 1024;
   }
-  if (Tok.getAsInteger(10, Val))
+  if (!to_integer(Tok, Val, 10))
     return None;
   return Val;
 }
@@ -900,10 +900,22 @@ Expr ScriptParser::readPrimary() {
     StringRef Name = readParenLiteral();
     return [=] { return Script->isDefined(Name) ? 1 : 0; };
   }
+  if (Tok == "LENGTH") {
+    StringRef Name = readParenLiteral();
+    if (Script->Opt.MemoryRegions.count(Name) == 0)
+      setError("memory region not defined: " + Name);
+    return [=] { return Script->Opt.MemoryRegions[Name].Length; };
+  }
   if (Tok == "LOADADDR") {
     StringRef Name = readParenLiteral();
     return [=] { return Script->getOutputSection(Location, Name)->getLMA(); };
   }
+  if (Tok == "ORIGIN") {
+    StringRef Name = readParenLiteral();
+    if (Script->Opt.MemoryRegions.count(Name) == 0)
+      setError("memory region not defined: " + Name);
+    return [=] { return Script->Opt.MemoryRegions[Name].Origin; };
+  }
   if (Tok == "SEGMENT_START") {
     expect("(");
     skip();
diff --git a/ELF/Strings.cpp b/ELF/Strings.cpp
index 29760b492ba9..2e88bfba0fc1 100644
--- a/ELF/Strings.cpp
+++ b/ELF/Strings.cpp
@@ -46,7 +46,7 @@ int elf::getPriority(StringRef S) {
   if (Pos == StringRef::npos)
     return 65536;
   int V;
-  if (S.substr(Pos + 1).getAsInteger(10, V))
+  if (!to_integer(S.substr(Pos + 1), V, 10))
     return 65536;
   return V;
 }
@@ -68,7 +68,7 @@ std::vector<uint8_t> elf::parseHex(StringRef S) {
     StringRef B = S.substr(0, 2);
     S = S.substr(2);
     uint8_t H;
-    if (B.getAsInteger(16, H)) {
+    if (!to_integer(B, H, 16)) {
       error("not a hexadecimal value: " + B);
       return {};
     }
diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp
index 2090b33e8cd6..7ce1f5354b1b 100644
--- a/ELF/Symbols.cpp
+++ b/ELF/Symbols.cpp
@@ -163,8 +163,8 @@ uint64_t SymbolBody::getVA(int64_t Addend) const {
   return OutVA + Addend;
 }
 
-template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const {
-  return In<ELFT>::Got->getVA() + getGotOffset();
+uint64_t SymbolBody::getGotVA() const {
+  return InX::Got->getVA() + getGotOffset();
 }
 
 uint64_t SymbolBody::getGotOffset() const {
@@ -370,11 +370,6 @@ std::string lld::toString(const SymbolBody &B) {
   return B.getName();
 }
 
-template uint32_t SymbolBody::template getGotVA<ELF32LE>() const;
-template uint32_t SymbolBody::template getGotVA<ELF32BE>() const;
-template uint64_t SymbolBody::template getGotVA<ELF64LE>() const;
-template uint64_t SymbolBody::template getGotVA<ELF64BE>() const;
-
 template uint32_t SymbolBody::template getSize<ELF32LE>() const;
 template uint32_t SymbolBody::template getSize<ELF32BE>() const;
 template uint64_t SymbolBody::template getSize<ELF64LE>() const;
diff --git a/ELF/Symbols.h b/ELF/Symbols.h
index 39a0c0f7b4df..030527f63744 100644
--- a/ELF/Symbols.h
+++ b/ELF/Symbols.h
@@ -78,7 +78,7 @@ class SymbolBody {
   uint64_t getVA(int64_t Addend = 0) const;
 
   uint64_t getGotOffset() const;
-  template <class ELFT> typename ELFT::uint getGotVA() const;
+  uint64_t getGotVA() const;
   uint64_t getGotPltOffset() const;
   uint64_t getGotPltVA() const;
   uint64_t getPltVA() const;
diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp
index 9c585e41e9f0..5a2c2c37efd8 100644
--- a/ELF/SyntheticSections.cpp
+++ b/ELF/SyntheticSections.cpp
@@ -186,7 +186,7 @@ template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) {
   Options->size = getSize();
 
   if (!Config->Relocatable)
-    Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp();
+    Reginfo.ri_gp_value = InX::MipsGot->getGp();
   memcpy(Buf + sizeof(Elf_Mips_Options), &Reginfo, sizeof(Reginfo));
 }
 
@@ -244,7 +244,7 @@ MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo)
 
 template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) {
   if (!Config->Relocatable)
-    Reginfo.ri_gp_value = In<ELFT>::MipsGot->getGp();
+    Reginfo.ri_gp_value = InX::MipsGot->getGp();
   memcpy(Buf, &Reginfo, sizeof(Reginfo));
 }
 
@@ -293,13 +293,12 @@ InputSection *elf::createInterpSection() {
   return Sec;
 }
 
-template <class ELFT>
 SymbolBody *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value,
                                    uint64_t Size, InputSectionBase *Section) {
   auto *S = make<DefinedRegular>(Name, /*IsLocal*/ true, STV_DEFAULT, Type,
                                  Value, Size, Section, nullptr);
-  if (In<ELFT>::SymTab)
-    In<ELFT>::SymTab->addSymbol(S);
+  if (InX::SymTab)
+    InX::SymTab->addSymbol(S);
   return S;
 }
 
@@ -356,7 +355,7 @@ void BuildIdSection::computeHash(
   std::vector<uint8_t> Hashes(Chunks.size() * HashSize);
 
   // Compute hash values.
-  parallelFor(0, Chunks.size(), [&](size_t I) {
+  parallelForEachN(0, Chunks.size(), [&](size_t I) {
     HashFn(Hashes.data() + I * HashSize, Chunks[I]);
   });
 
@@ -618,17 +617,16 @@ template <class ELFT> void EhFrameSection<ELFT>::writeTo(uint8_t *Buf) {
   }
 }
 
-template <class ELFT>
-GotSection<ELFT>::GotSection()
+GotBaseSection::GotBaseSection()
     : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
                        Target->GotEntrySize, ".got") {}
 
-template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody &Sym) {
+void GotBaseSection::addEntry(SymbolBody &Sym) {
   Sym.GotIndex = NumEntries;
   ++NumEntries;
 }
 
-template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) {
+bool GotBaseSection::addDynTlsEntry(SymbolBody &Sym) {
   if (Sym.GlobalDynIndex != -1U)
     return false;
   Sym.GlobalDynIndex = NumEntries;
@@ -639,7 +637,7 @@ template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) {
 
 // Reserves TLS entries for a TLS module ID and a TLS block offset.
 // In total it takes two GOT slots.
-template <class ELFT> bool GotSection<ELFT>::addTlsIndex() {
+bool GotBaseSection::addTlsIndex() {
   if (TlsIndexOff != uint32_t(-1))
     return false;
   TlsIndexOff = NumEntries * Config->Wordsize;
@@ -647,21 +645,19 @@ template <class ELFT> bool GotSection<ELFT>::addTlsIndex() {
   return true;
 }
 
-template <class ELFT>
-uint64_t GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const {
+uint64_t GotBaseSection::getGlobalDynAddr(const SymbolBody &B) const {
   return this->getVA() + B.GlobalDynIndex * Config->Wordsize;
 }
 
-template <class ELFT>
-uint64_t GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const {
+uint64_t GotBaseSection::getGlobalDynOffset(const SymbolBody &B) const {
   return B.GlobalDynIndex * Config->Wordsize;
 }
 
-template <class ELFT> void GotSection<ELFT>::finalizeContents() {
+void GotBaseSection::finalizeContents() {
   Size = NumEntries * Config->Wordsize;
 }
 
-template <class ELFT> bool GotSection<ELFT>::empty() const {
+bool GotBaseSection::empty() const {
   // If we have a relocation that is relative to GOT (such as GOTOFFREL),
   // we need to emit a GOT even if it's empty.
   return NumEntries == 0 && !HasGotOffRel;
@@ -1028,24 +1024,15 @@ template <class ELFT> void DynamicSection<ELFT>::addEntries() {
   // Add strings to .dynstr early so that .dynstr's size will be
   // fixed early.
   for (StringRef S : Config->AuxiliaryList)
-    add({DT_AUXILIARY, In<ELFT>::DynStrTab->addString(S)});
+    add({DT_AUXILIARY, InX::DynStrTab->addString(S)});
   if (!Config->Rpath.empty())
     add({Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH,
-         In<ELFT>::DynStrTab->addString(Config->Rpath)});
+         InX::DynStrTab->addString(Config->Rpath)});
   for (SharedFile<ELFT> *F : Symtab<ELFT>::X->getSharedFiles())
     if (F->isNeeded())
-      add({DT_NEEDED, In<ELFT>::DynStrTab->addString(F->SoName)});
+      add({DT_NEEDED, InX::DynStrTab->addString(F->SoName)});
   if (!Config->SoName.empty())
-    add({DT_SONAME, In<ELFT>::DynStrTab->addString(Config->SoName)});
-
-  if (!Config->Shared && !Config->Relocatable)
-    add({DT_DEBUG, (uint64_t)0});
-}
-
-// Add remaining entries to complete .dynamic contents.
-template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
-  if (this->Size)
-    return; // Already finalized.
+    add({DT_SONAME, InX::DynStrTab->addString(Config->SoName)});
 
   // Set DT_FLAGS and DT_FLAGS_1.
   uint32_t DtFlags = 0;
@@ -1064,15 +1051,22 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
     DtFlags |= DF_ORIGIN;
     DtFlags1 |= DF_1_ORIGIN;
   }
-  if (Config->HasStaticTlsModel)
-    DtFlags |= DF_STATIC_TLS;
 
   if (DtFlags)
     add({DT_FLAGS, DtFlags});
   if (DtFlags1)
     add({DT_FLAGS_1, DtFlags1});
 
-  this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex;
+  if (!Config->Shared && !Config->Relocatable)
+    add({DT_DEBUG, (uint64_t)0});
+}
+
+// Add remaining entries to complete .dynamic contents.
+template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
+  if (this->Size)
+    return; // Already finalized.
+
+  this->Link = InX::DynStrTab->OutSec->SectionIndex;
   if (In<ELFT>::RelaDyn->OutSec->Size > 0) {
     bool IsRela = Config->IsRela;
     add({IsRela ? DT_RELA : DT_REL, In<ELFT>::RelaDyn});
@@ -1093,18 +1087,18 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
     add({DT_JMPREL, In<ELFT>::RelaPlt});
     add({DT_PLTRELSZ, In<ELFT>::RelaPlt->OutSec->Size});
     add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT,
-         In<ELFT>::GotPlt});
+         InX::GotPlt});
     add({DT_PLTREL, uint64_t(Config->IsRela ? DT_RELA : DT_REL)});
   }
 
-  add({DT_SYMTAB, In<ELFT>::DynSymTab});
+  add({DT_SYMTAB, InX::DynSymTab});
   add({DT_SYMENT, sizeof(Elf_Sym)});
-  add({DT_STRTAB, In<ELFT>::DynStrTab});
-  add({DT_STRSZ, In<ELFT>::DynStrTab->getSize()});
+  add({DT_STRTAB, InX::DynStrTab});
+  add({DT_STRSZ, InX::DynStrTab->getSize()});
   if (!Config->ZText)
     add({DT_TEXTREL, (uint64_t)0});
-  if (In<ELFT>::GnuHashTab)
-    add({DT_GNU_HASH, In<ELFT>::GnuHashTab});
+  if (InX::GnuHashTab)
+    add({DT_GNU_HASH, InX::GnuHashTab});
   if (In<ELFT>::HashTab)
     add({DT_HASH, In<ELFT>::HashTab});
 
@@ -1142,15 +1136,15 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
     add({DT_MIPS_RLD_VERSION, 1});
     add({DT_MIPS_FLAGS, RHF_NOTPOT});
     add({DT_MIPS_BASE_ADDRESS, Config->ImageBase});
-    add({DT_MIPS_SYMTABNO, In<ELFT>::DynSymTab->getNumSymbols()});
-    add({DT_MIPS_LOCAL_GOTNO, In<ELFT>::MipsGot->getLocalEntriesNum()});
-    if (const SymbolBody *B = In<ELFT>::MipsGot->getFirstGlobalEntry())
+    add({DT_MIPS_SYMTABNO, InX::DynSymTab->getNumSymbols()});
+    add({DT_MIPS_LOCAL_GOTNO, InX::MipsGot->getLocalEntriesNum()});
+    if (const SymbolBody *B = InX::MipsGot->getFirstGlobalEntry())
       add({DT_MIPS_GOTSYM, B->DynsymIndex});
     else
-      add({DT_MIPS_GOTSYM, In<ELFT>::DynSymTab->getNumSymbols()});
-    add({DT_PLTGOT, In<ELFT>::MipsGot});
-    if (In<ELFT>::MipsRldMap)
-      add({DT_MIPS_RLD_MAP, In<ELFT>::MipsRldMap});
+      add({DT_MIPS_GOTSYM, InX::DynSymTab->getNumSymbols()});
+    add({DT_PLTGOT, InX::MipsGot});
+    if (InX::MipsRldMap)
+      add({DT_MIPS_RLD_MAP, InX::MipsRldMap});
   }
 
   this->OutSec->Link = this->Link;
@@ -1235,11 +1229,11 @@ template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) {
     if (Config->IsRela)
       P->r_addend = Rel.getAddend();
     P->r_offset = Rel.getOffset();
-    if (Config->EMachine == EM_MIPS && Rel.getInputSec() == In<ELFT>::MipsGot)
+    if (Config->EMachine == EM_MIPS && Rel.getInputSec() == InX::MipsGot)
       // Dynamic relocation against MIPS GOT section make deal TLS entries
       // allocated in the end of the GOT. We need to adjust the offset to take
       // in account 'local' and 'global' GOT entries.
-      P->r_offset += In<ELFT>::MipsGot->getTlsOffset();
+      P->r_offset += InX::MipsGot->getTlsOffset();
     P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->IsMips64EL);
   }
 
@@ -1259,22 +1253,19 @@ template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() {
 }
 
 template <class ELFT> void RelocationSection<ELFT>::finalizeContents() {
-  this->Link = In<ELFT>::DynSymTab ? In<ELFT>::DynSymTab->OutSec->SectionIndex
-                                   : In<ELFT>::SymTab->OutSec->SectionIndex;
+  this->Link = InX::DynSymTab ? InX::DynSymTab->OutSec->SectionIndex
+                              : InX::SymTab->OutSec->SectionIndex;
 
   // Set required output section properties.
   this->OutSec->Link = this->Link;
 }
 
-template <class ELFT>
-SymbolTableSection<ELFT>::SymbolTableSection(StringTableSection &StrTabSec)
+SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &StrTabSec)
     : SyntheticSection(StrTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0,
                        StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB,
                        Config->Wordsize,
                        StrTabSec.isDynamic() ? ".dynsym" : ".symtab"),
-      StrTabSec(StrTabSec) {
-  this->Entsize = sizeof(Elf_Sym);
-}
+      StrTabSec(StrTabSec) {}
 
 // Orders symbols according to their positions in the GOT,
 // in compliance with MIPS ABI rules.
@@ -1296,7 +1287,7 @@ static bool sortMipsSymbols(const SymbolTableEntry &L,
 // symbols precede global symbols, so we sort symbol entries in this
 // function. (For .dynsym, we don't do that because symbols for
 // dynamic linking are inherently all globals.)
-template <class ELFT> void SymbolTableSection<ELFT>::finalizeContents() {
+void SymbolTableBaseSection::finalizeContents() {
   this->OutSec->Link = StrTabSec.OutSec->SectionIndex;
 
   // If it is a .dynsym, there should be no local symbols, but we need
@@ -1306,9 +1297,9 @@ template <class ELFT> void SymbolTableSection<ELFT>::finalizeContents() {
     // Because the first symbol entry is a null entry, 1 is the first.
     this->OutSec->Info = 1;
 
-    if (In<ELFT>::GnuHashTab) {
+    if (InX::GnuHashTab) {
       // NB: It also sorts Symbols to meet the GNU hash table requirements.
-      In<ELFT>::GnuHashTab->addSymbols(Symbols);
+      InX::GnuHashTab->addSymbols(Symbols);
     } else if (Config->EMachine == EM_MIPS) {
       std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols);
     }
@@ -1320,7 +1311,7 @@ template <class ELFT> void SymbolTableSection<ELFT>::finalizeContents() {
   }
 }
 
-template <class ELFT> void SymbolTableSection<ELFT>::postThunkContents() {
+void SymbolTableBaseSection::postThunkContents() {
   if (this->Type == SHT_DYNSYM)
     return;
   // move all local symbols before global symbols.
@@ -1333,7 +1324,7 @@ template <class ELFT> void SymbolTableSection<ELFT>::postThunkContents() {
   this->OutSec->Info = NumLocals + 1;
 }
 
-template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) {
+void SymbolTableBaseSection::addSymbol(SymbolBody *B) {
   // Adding a local symbol to a .dynsym is a bug.
   assert(this->Type != SHT_DYNSYM || !B->isLocal());
 
@@ -1341,8 +1332,7 @@ template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) {
   Symbols.push_back({B, StrTabSec.addString(B->getName(), HashIt)});
 }
 
-template <class ELFT>
-size_t SymbolTableSection<ELFT>::getSymbolIndex(SymbolBody *Body) {
+size_t SymbolTableBaseSection::getSymbolIndex(SymbolBody *Body) {
   auto I = llvm::find_if(Symbols, [&](const SymbolTableEntry &E) {
     if (E.Symbol == Body)
       return true;
@@ -1358,6 +1348,12 @@ size_t SymbolTableSection<ELFT>::getSymbolIndex(SymbolBody *Body) {
   return I - Symbols.begin() + 1;
 }
 
+template <class ELFT>
+SymbolTableSection<ELFT>::SymbolTableSection(StringTableSection &StrTabSec)
+    : SymbolTableBaseSection(StrTabSec) {
+  this->Entsize = sizeof(Elf_Sym);
+}
+
 // Write the internal symbol table contents to the output symbol table.
 template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) {
   // The first entry is a null entry as per the ELF spec.
@@ -1450,13 +1446,12 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) {
 // DSOs very quickly. If you are sure that your dynamic linker knows
 // about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a
 // safe bet is to specify -hash-style=both for backward compatibilty.
-template <class ELFT>
-GnuHashTableSection<ELFT>::GnuHashTableSection()
+GnuHashTableSection::GnuHashTableSection()
     : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, Config->Wordsize, ".gnu.hash") {
 }
 
-template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() {
-  this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
+void GnuHashTableSection::finalizeContents() {
+  this->OutSec->Link = InX::DynSymTab->OutSec->SectionIndex;
 
   // Computes bloom filter size in word size. We want to allocate 8
   // bits for each symbol. It must be a power of two.
@@ -1471,11 +1466,10 @@ template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() {
   Size += Symbols.size() * 4;           // Hash values
 }
 
-template <class ELFT>
-void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
+void GnuHashTableSection::writeTo(uint8_t *Buf) {
   // Write a header.
   write32(Buf, NBuckets, Config->Endianness);
-  write32(Buf + 4, In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(),
+  write32(Buf + 4, InX::DynSymTab->getNumSymbols() - Symbols.size(),
           Config->Endianness);
   write32(Buf + 8, MaskWords, Config->Endianness);
   write32(Buf + 12, getShift2(), Config->Endianness);
@@ -1494,8 +1488,7 @@ void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
 //
 // [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2),
 //     p.9, https://www.akkadia.org/drepper/dsohowto.pdf
-template <class ELFT>
-void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
+void GnuHashTableSection::writeBloomFilter(uint8_t *Buf) {
   const unsigned C = Config->Wordsize * 8;
   for (const Entry &Sym : Symbols) {
     size_t I = (Sym.Hash / C) & (MaskWords - 1);
@@ -1506,8 +1499,7 @@ void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
   }
 }
 
-template <class ELFT>
-void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) {
+void GnuHashTableSection::writeHashTable(uint8_t *Buf) {
   // Group symbols by hash value.
   std::vector<std::vector<Entry>> Syms(NBuckets);
   for (const Entry &Ent : Symbols)
@@ -1560,8 +1552,7 @@ static size_t getBucketSize(size_t NumSymbols) {
 // Add symbols to this symbol hash table. Note that this function
 // destructively sort a given vector -- which is needed because
 // GNU-style hash table places some sorting requirements.
-template <class ELFT>
-void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) {
+void GnuHashTableSection::addSymbols(std::vector<SymbolTableEntry> &V) {
   // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
   // its type correctly.
   std::vector<SymbolTableEntry>::iterator Mid =
@@ -1594,15 +1585,15 @@ HashTableSection<ELFT>::HashTableSection()
 }
 
 template <class ELFT> void HashTableSection<ELFT>::finalizeContents() {
-  this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
+  this->OutSec->Link = InX::DynSymTab->OutSec->SectionIndex;
 
   unsigned NumEntries = 2;                            // nbucket and nchain.
-  NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); // The chain entries.
+  NumEntries += InX::DynSymTab->getNumSymbols(); // The chain entries.
 
   // Create as many buckets as there are symbols.
   // FIXME: This is simplistic. We can try to optimize it, but implementing
   // support for SHT_GNU_HASH is probably even more profitable.
-  NumEntries += In<ELFT>::DynSymTab->getNumSymbols();
+  NumEntries += InX::DynSymTab->getNumSymbols();
   this->Size = NumEntries * 4;
 }
 
@@ -1610,7 +1601,7 @@ template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) {
   // A 32-bit integer type in the target endianness.
   typedef typename ELFT::Word Elf_Word;
 
-  unsigned NumSymbols = In<ELFT>::DynSymTab->getNumSymbols();
+  unsigned NumSymbols = InX::DynSymTab->getNumSymbols();
 
   auto *P = reinterpret_cast<Elf_Word *>(Buf);
   *P++ = NumSymbols; // nbucket
@@ -1619,7 +1610,7 @@ template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) {
   Elf_Word *Buckets = P;
   Elf_Word *Chains = P + NumSymbols;
 
-  for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) {
+  for (const SymbolTableEntry &S : InX::DynSymTab->getSymbols()) {
     SymbolBody *Body = S.Symbol;
     StringRef Name = Body->getName();
     unsigned I = Body->DynsymIndex;
@@ -1706,13 +1697,14 @@ readCuList(DWARFContext &Dwarf, InputSection *Sec) {
   return Ret;
 }
 
-static InputSectionBase *findSection(ArrayRef<InputSectionBase *> Arr,
-                                     uint64_t Offset) {
+static InputSection *findSection(ArrayRef<InputSectionBase *> Arr,
+                                 uint64_t Offset) {
   for (InputSectionBase *S : Arr)
-    if (S && S != &InputSection::Discarded)
-      if (Offset >= S->getOffsetInFile() &&
-          Offset < S->getOffsetInFile() + S->getSize())
-        return S;
+    if (auto *IS = dyn_cast_or_null<InputSection>(S))
+      if (IS != &InputSection::Discarded && IS->Live &&
+          Offset >= IS->getOffsetInFile() &&
+          Offset < IS->getOffsetInFile() + IS->getSize())
+        return IS;
   return nullptr;
 }
 
@@ -1725,10 +1717,10 @@ readAddressArea(DWARFContext &Dwarf, InputSection *Sec, size_t CurrentCU) {
     CU->collectAddressRanges(Ranges);
 
     ArrayRef<InputSectionBase *> Sections = Sec->File->getSections();
-    for (std::pair<uint64_t, uint64_t> &R : Ranges)
-      if (InputSectionBase *S = findSection(Sections, R.first))
-        Ret.push_back({S, R.first - S->getOffsetInFile(),
-                       R.second - S->getOffsetInFile(), CurrentCU});
+    for (DWARFAddressRange &R : Ranges)
+      if (InputSection *S = findSection(Sections, R.LowPC))
+        Ret.push_back({S, R.LowPC - S->getOffsetInFile(),
+                       R.HighPC - S->getOffsetInFile(), CurrentCU});
     ++CurrentCU;
   }
   return Ret;
@@ -1951,11 +1943,11 @@ static StringRef getFileDefName() {
 }
 
 template <class ELFT> void VersionDefinitionSection<ELFT>::finalizeContents() {
-  FileDefNameOff = In<ELFT>::DynStrTab->addString(getFileDefName());
+  FileDefNameOff = InX::DynStrTab->addString(getFileDefName());
   for (VersionDefinition &V : Config->VersionDefinitions)
-    V.NameOff = In<ELFT>::DynStrTab->addString(V.Name);
+    V.NameOff = InX::DynStrTab->addString(V.Name);
 
-  this->OutSec->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex;
+  this->OutSec->Link = InX::DynStrTab->OutSec->SectionIndex;
 
   // sh_info should be set to the number of definitions. This fact is missed in
   // documentation, but confirmed by binutils community:
@@ -2008,16 +2000,16 @@ VersionTableSection<ELFT>::VersionTableSection()
 template <class ELFT> void VersionTableSection<ELFT>::finalizeContents() {
   // At the moment of june 2016 GNU docs does not mention that sh_link field
   // should be set, but Sun docs do. Also readelf relies on this field.
-  this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
+  this->OutSec->Link = InX::DynSymTab->OutSec->SectionIndex;
 }
 
 template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const {
-  return sizeof(Elf_Versym) * (In<ELFT>::DynSymTab->getSymbols().size() + 1);
+  return sizeof(Elf_Versym) * (InX::DynSymTab->getSymbols().size() + 1);
 }
 
 template <class ELFT> void VersionTableSection<ELFT>::writeTo(uint8_t *Buf) {
   auto *OutVersym = reinterpret_cast<Elf_Versym *>(Buf) + 1;
-  for (const SymbolTableEntry &S : In<ELFT>::DynSymTab->getSymbols()) {
+  for (const SymbolTableEntry &S : InX::DynSymTab->getSymbols()) {
     OutVersym->vs_index = S.Symbol->symbol()->VersionId;
     ++OutVersym;
   }
@@ -2051,14 +2043,14 @@ void VersionNeedSection<ELFT>::addSymbol(SharedSymbol *SS) {
   // to create one by adding it to our needed list and creating a dynstr entry
   // for the soname.
   if (File->VerdefMap.empty())
-    Needed.push_back({File, In<ELFT>::DynStrTab->addString(File->SoName)});
+    Needed.push_back({File, InX::DynStrTab->addString(File->SoName)});
   typename SharedFile<ELFT>::NeededVer &NV = File->VerdefMap[Ver];
   // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef,
   // prepare to create one by allocating a version identifier and creating a
   // dynstr entry for the version name.
   if (NV.Index == 0) {
-    NV.StrTab = In<ELFT>::DynStrTab->addString(File->getStringTable().data() +
-                                               Ver->getAux()->vda_name);
+    NV.StrTab = InX::DynStrTab->addString(File->getStringTable().data() +
+                                          Ver->getAux()->vda_name);
     NV.Index = NextIndex++;
   }
   SS->symbol()->VersionId = NV.Index;
@@ -2100,7 +2092,7 @@ template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) {
 }
 
 template <class ELFT> void VersionNeedSection<ELFT>::finalizeContents() {
-  this->OutSec->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex;
+  this->OutSec->Link = InX::DynStrTab->OutSec->SectionIndex;
   this->OutSec->Info = Needed.size();
 }
 
@@ -2187,7 +2179,7 @@ MipsRldMapSection::MipsRldMapSection()
 
 void MipsRldMapSection::writeTo(uint8_t *Buf) {
   // Apply filler from linker script.
-  Optional<uint32_t> Fill = Script->getFiller(this->Name);
+  Optional<uint32_t> Fill = Script->getFiller(this->OutSec);
   if (!Fill || *Fill == 0)
     return;
 
@@ -2245,10 +2237,14 @@ BssSection *InX::Bss;
 BssSection *InX::BssRelRo;
 BuildIdSection *InX::BuildId;
 InputSection *InX::Common;
+SyntheticSection *InX::Dynamic;
 StringTableSection *InX::DynStrTab;
+SymbolTableBaseSection *InX::DynSymTab;
 InputSection *InX::Interp;
 GdbIndexSection *InX::GdbIndex;
+GotBaseSection *InX::Got;
 GotPltSection *InX::GotPlt;
+GnuHashTableSection *InX::GnuHashTab;
 IgotPltSection *InX::IgotPlt;
 MipsGotSection *InX::MipsGot;
 MipsRldMapSection *InX::MipsRldMap;
@@ -2256,6 +2252,7 @@ PltSection *InX::Plt;
 PltSection *InX::Iplt;
 StringTableSection *InX::ShStrTab;
 StringTableSection *InX::StrTab;
+SymbolTableBaseSection *InX::SymTab;
 
 template void PltSection::addEntry<ELF32LE>(SymbolBody &Sym);
 template void PltSection::addEntry<ELF32BE>(SymbolBody &Sym);
@@ -2272,19 +2269,6 @@ template MergeInputSection *elf::createCommentSection<ELF32BE>();
 template MergeInputSection *elf::createCommentSection<ELF64LE>();
 template MergeInputSection *elf::createCommentSection<ELF64BE>();
 
-template SymbolBody *elf::addSyntheticLocal<ELF32LE>(StringRef, uint8_t,
-                                                     uint64_t, uint64_t,
-                                                     InputSectionBase *);
-template SymbolBody *elf::addSyntheticLocal<ELF32BE>(StringRef, uint8_t,
-                                                     uint64_t, uint64_t,
-                                                     InputSectionBase *);
-template SymbolBody *elf::addSyntheticLocal<ELF64LE>(StringRef, uint8_t,
-                                                     uint64_t, uint64_t,
-                                                     InputSectionBase *);
-template SymbolBody *elf::addSyntheticLocal<ELF64BE>(StringRef, uint8_t,
-                                                     uint64_t, uint64_t,
-                                                     InputSectionBase *);
-
 template class elf::MipsAbiFlagsSection<ELF32LE>;
 template class elf::MipsAbiFlagsSection<ELF32BE>;
 template class elf::MipsAbiFlagsSection<ELF64LE>;
@@ -2320,11 +2304,6 @@ template class elf::SymbolTableSection<ELF32BE>;
 template class elf::SymbolTableSection<ELF64LE>;
 template class elf::SymbolTableSection<ELF64BE>;
 
-template class elf::GnuHashTableSection<ELF32LE>;
-template class elf::GnuHashTableSection<ELF32BE>;
-template class elf::GnuHashTableSection<ELF64LE>;
-template class elf::GnuHashTableSection<ELF64BE>;
-
 template class elf::HashTableSection<ELF32LE>;
 template class elf::HashTableSection<ELF32BE>;
 template class elf::HashTableSection<ELF64LE>;
diff --git a/ELF/SyntheticSections.h b/ELF/SyntheticSections.h
index 1098c58a3baf..0477c601a7df 100644
--- a/ELF/SyntheticSections.h
+++ b/ELF/SyntheticSections.h
@@ -104,10 +104,9 @@ template <class ELFT> class EhFrameSection final : public SyntheticSection {
   llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap;
 };
 
-template <class ELFT> class GotSection final : public SyntheticSection {
+class GotBaseSection : public SyntheticSection {
 public:
-  GotSection();
-  void writeTo(uint8_t *Buf) override;
+  GotBaseSection();
   size_t getSize() const override { return Size; }
   void finalizeContents() override;
   bool empty() const override;
@@ -125,12 +124,17 @@ template <class ELFT> class GotSection final : public SyntheticSection {
   // that relies on its address.
   bool HasGotOffRel = false;
 
-private:
+protected:
   size_t NumEntries = 0;
   uint32_t TlsIndexOff = -1;
   uint64_t Size = 0;
 };
 
+template <class ELFT> class GotSection final : public GotBaseSection {
+public:
+  void writeTo(uint8_t *Buf) override;
+};
+
 // .note.gnu.build-id section.
 class BuildIdSection : public SyntheticSection {
   // First 16 bytes are a header.
@@ -401,31 +405,35 @@ struct SymbolTableEntry {
   size_t StrTabOffset;
 };
 
-template <class ELFT> class SymbolTableSection final : public SyntheticSection {
+class SymbolTableBaseSection : public SyntheticSection {
 public:
-  typedef typename ELFT::Sym Elf_Sym;
-
-  SymbolTableSection(StringTableSection &StrTabSec);
-
+  SymbolTableBaseSection(StringTableSection &StrTabSec);
   void finalizeContents() override;
   void postThunkContents() override;
-  void writeTo(uint8_t *Buf) override;
-  size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); }
+  size_t getSize() const override { return getNumSymbols() * Entsize; }
   void addSymbol(SymbolBody *Body);
   unsigned getNumSymbols() const { return Symbols.size() + 1; }
   size_t getSymbolIndex(SymbolBody *Body);
   ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; }
 
-private:
+protected:
   // A vector of symbols and their string table offsets.
   std::vector<SymbolTableEntry> Symbols;
 
   StringTableSection &StrTabSec;
 };
 
+template <class ELFT>
+class SymbolTableSection final : public SymbolTableBaseSection {
+  typedef typename ELFT::Sym Elf_Sym;
+
+public:
+  SymbolTableSection(StringTableSection &StrTabSec);
+  void writeTo(uint8_t *Buf) override;
+};
+
 // Outputs GNU Hash section. For detailed explanation see:
 // https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
-template <class ELFT>
 class GnuHashTableSection final : public SyntheticSection {
 public:
   GnuHashTableSection();
@@ -739,7 +747,7 @@ class ThunkSection : public SyntheticSection {
 template <class ELFT> InputSection *createCommonSection();
 InputSection *createInterpSection();
 template <class ELFT> MergeInputSection *createCommentSection();
-template <class ELFT>
+
 SymbolBody *addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value,
                               uint64_t Size, InputSectionBase *Section);
 
@@ -750,9 +758,13 @@ struct InX {
   static BssSection *BssRelRo;
   static BuildIdSection *BuildId;
   static InputSection *Common;
+  static SyntheticSection *Dynamic;
   static StringTableSection *DynStrTab;
+  static SymbolTableBaseSection *DynSymTab;
+  static GnuHashTableSection *GnuHashTab;
   static InputSection *Interp;
   static GdbIndexSection *GdbIndex;
+  static GotBaseSection *Got;
   static GotPltSection *GotPlt;
   static IgotPltSection *IgotPlt;
   static MipsGotSection *MipsGot;
@@ -761,36 +773,27 @@ struct InX {
   static PltSection *Iplt;
   static StringTableSection *ShStrTab;
   static StringTableSection *StrTab;
+  static SymbolTableBaseSection *SymTab;
 };
 
 template <class ELFT> struct In : public InX {
-  static DynamicSection<ELFT> *Dynamic;
-  static SymbolTableSection<ELFT> *DynSymTab;
   static EhFrameHeader<ELFT> *EhFrameHdr;
-  static GnuHashTableSection<ELFT> *GnuHashTab;
-  static GotSection<ELFT> *Got;
   static EhFrameSection<ELFT> *EhFrame;
   static HashTableSection<ELFT> *HashTab;
   static RelocationSection<ELFT> *RelaDyn;
   static RelocationSection<ELFT> *RelaPlt;
   static RelocationSection<ELFT> *RelaIplt;
-  static SymbolTableSection<ELFT> *SymTab;
   static VersionDefinitionSection<ELFT> *VerDef;
   static VersionTableSection<ELFT> *VerSym;
   static VersionNeedSection<ELFT> *VerNeed;
 };
 
-template <class ELFT> DynamicSection<ELFT> *In<ELFT>::Dynamic;
-template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::DynSymTab;
 template <class ELFT> EhFrameHeader<ELFT> *In<ELFT>::EhFrameHdr;
-template <class ELFT> GnuHashTableSection<ELFT> *In<ELFT>::GnuHashTab;
-template <class ELFT> GotSection<ELFT> *In<ELFT>::Got;
 template <class ELFT> EhFrameSection<ELFT> *In<ELFT>::EhFrame;
 template <class ELFT> HashTableSection<ELFT> *In<ELFT>::HashTab;
 template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaDyn;
 template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaPlt;
 template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaIplt;
-template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::SymTab;
 template <class ELFT> VersionDefinitionSection<ELFT> *In<ELFT>::VerDef;
 template <class ELFT> VersionTableSection<ELFT> *In<ELFT>::VerSym;
 template <class ELFT> VersionNeedSection<ELFT> *In<ELFT>::VerNeed;
diff --git a/ELF/Target.cpp b/ELF/Target.cpp
index 4643c1a919aa..781d7fe3bc3f 100644
--- a/ELF/Target.cpp
+++ b/ELF/Target.cpp
@@ -351,15 +351,6 @@ X86TargetInfo::X86TargetInfo() {
 
 RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S,
                                   const uint8_t *Loc) const {
-  // There are 4 different TLS variable models with varying degrees of
-  // flexibility and performance. LocalExec and InitialExec models are fast but
-  // less-flexible models. They cannot be used for dlopen(). If they are in use,
-  // we set DF_STATIC_TLS in the ELF header so that the runtime can reject such
-  // DSOs.
-  if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || Type == R_386_TLS_IE ||
-      Type == R_386_TLS_GOTIE)
-    Config->HasStaticTlsModel = true;
-
   switch (Type) {
   case R_386_8:
   case R_386_16:
@@ -429,7 +420,7 @@ RelExpr X86TargetInfo::adjustRelaxExpr(uint32_t Type, const uint8_t *Data,
 }
 
 void X86TargetInfo::writeGotPltHeader(uint8_t *Buf) const {
-  write32le(Buf, In<ELF32LE>::Dynamic->getVA());
+  write32le(Buf, InX::Dynamic->getVA());
 }
 
 void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {
@@ -460,8 +451,8 @@ void X86TargetInfo::writePltHeader(uint8_t *Buf) const {
     };
     memcpy(Buf, V, sizeof(V));
 
-    uint32_t Ebx = In<ELF32LE>::Got->getVA() + In<ELF32LE>::Got->getSize();
-    uint32_t GotPlt = In<ELF32LE>::GotPlt->getVA() - Ebx;
+    uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
+    uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
     write32le(Buf + 2, GotPlt + 4);
     write32le(Buf + 8, GotPlt + 8);
     return;
@@ -473,7 +464,7 @@ void X86TargetInfo::writePltHeader(uint8_t *Buf) const {
       0x90, 0x90, 0x90, 0x90              // nop
   };
   memcpy(Buf, PltData, sizeof(PltData));
-  uint32_t GotPlt = In<ELF32LE>::GotPlt->getVA();
+  uint32_t GotPlt = InX::GotPlt->getVA();
   write32le(Buf + 2, GotPlt + 4);
   write32le(Buf + 8, GotPlt + 8);
 }
@@ -490,7 +481,7 @@ void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
 
   if (Config->Pic) {
     // jmp *foo@GOT(%ebx)
-    uint32_t Ebx = In<ELF32LE>::Got->getVA() + In<ELF32LE>::Got->getSize();
+    uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
     Buf[1] = 0xa3;
     write32le(Buf + 2, GotPltEntryAddr - Ebx);
   } else {
@@ -718,7 +709,7 @@ void X86_64TargetInfo<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
   // required, but it is documented in the psabi and the glibc dynamic linker
   // seems to use it (note that this is relevant for linking ld.so, not any
   // other program).
-  write64le(Buf, In<ELFT>::Dynamic->getVA());
+  write64le(Buf, InX::Dynamic->getVA());
 }
 
 template <class ELFT>
@@ -736,8 +727,8 @@ void X86_64TargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const {
       0x0f, 0x1f, 0x40, 0x00              // nop
   };
   memcpy(Buf, PltData, sizeof(PltData));
-  uint64_t GotPlt = In<ELFT>::GotPlt->getVA();
-  uint64_t Plt = In<ELFT>::Plt->getVA();
+  uint64_t GotPlt = InX::GotPlt->getVA();
+  uint64_t Plt = InX::Plt->getVA();
   write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
   write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
 }
@@ -760,7 +751,8 @@ void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
 
 template <class ELFT>
 bool X86_64TargetInfo<ELFT>::isPicRel(uint32_t Type) const {
-  return Type != R_X86_64_PC32 && Type != R_X86_64_32;
+  return Type != R_X86_64_PC32 && Type != R_X86_64_32 &&
+         Type != R_X86_64_TPOFF32;
 }
 
 template <class ELFT>
@@ -1140,7 +1132,7 @@ uint64_t getPPC64TocBase() {
   // TOC starts where the first of these sections starts. We always create a
   // .got when we see a relocation that uses it, so for us the start is always
   // the .got.
-  uint64_t TocVA = In<ELF64BE>::Got->getVA();
+  uint64_t TocVA = InX::Got->getVA();
 
   // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
   // thus permitting a full 64 Kbytes segment. Note that the glibc startup
@@ -1369,7 +1361,7 @@ bool AArch64TargetInfo::isPicRel(uint32_t Type) const {
 }
 
 void AArch64TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const {
-  write64le(Buf, In<ELF64LE>::Plt->getVA());
+  write64le(Buf, InX::Plt->getVA());
 }
 
 // Page(Expr) is the page address of the expression Expr, defined
@@ -1392,8 +1384,8 @@ void AArch64TargetInfo::writePltHeader(uint8_t *Buf) const {
   };
   memcpy(Buf, PltData, sizeof(PltData));
 
-  uint64_t Got = In<ELF64LE>::GotPlt->getVA();
-  uint64_t Plt = In<ELF64LE>::Plt->getVA();
+  uint64_t Got = InX::GotPlt->getVA();
+  uint64_t Plt = InX::Plt->getVA();
   relocateOne(Buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
               getAArch64Page(Got + 16) - getAArch64Page(Plt + 4));
   relocateOne(Buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, Got + 16);
@@ -1746,7 +1738,7 @@ uint32_t ARMTargetInfo::getDynRel(uint32_t Type) const {
 }
 
 void ARMTargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const {
-  write32le(Buf, In<ELF32LE>::Plt->getVA());
+  write32le(Buf, InX::Plt->getVA());
 }
 
 void ARMTargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const {
@@ -1763,15 +1755,15 @@ void ARMTargetInfo::writePltHeader(uint8_t *Buf) const {
       0x00, 0x00, 0x00, 0x00, // L2: .word   &(.got.plt) - L1 - 8
   };
   memcpy(Buf, PltData, sizeof(PltData));
-  uint64_t GotPlt = In<ELF32LE>::GotPlt->getVA();
-  uint64_t L1 = In<ELF32LE>::Plt->getVA() + 8;
+  uint64_t GotPlt = InX::GotPlt->getVA();
+  uint64_t L1 = InX::Plt->getVA() + 8;
   write32le(Buf + 16, GotPlt - L1 - 8);
 }
 
 void ARMTargetInfo::addPltHeaderSymbols(InputSectionBase *ISD) const {
   auto *IS = cast<InputSection>(ISD);
-  addSyntheticLocal<ELF32LE>("$a", STT_NOTYPE, 0, 0, IS);
-  addSyntheticLocal<ELF32LE>("$d", STT_NOTYPE, 16, 0, IS);
+  addSyntheticLocal("$a", STT_NOTYPE, 0, 0, IS);
+  addSyntheticLocal("$d", STT_NOTYPE, 16, 0, IS);
 }
 
 void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
@@ -1793,8 +1785,8 @@ void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
 
 void ARMTargetInfo::addPltSymbols(InputSectionBase *ISD, uint64_t Off) const {
   auto *IS = cast<InputSection>(ISD);
-  addSyntheticLocal<ELF32LE>("$a", STT_NOTYPE, Off, 0, IS);
-  addSyntheticLocal<ELF32LE>("$d", STT_NOTYPE, Off + 12, 0, IS);
+  addSyntheticLocal("$a", STT_NOTYPE, Off, 0, IS);
+  addSyntheticLocal("$d", STT_NOTYPE, Off + 12, 0, IS);
 }
 
 bool ARMTargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType,
@@ -1874,7 +1866,8 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type,
       // BLX (always unconditional) instruction to an ARM Target, select an
       // unconditional BL.
       write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff));
-  // fall through as BL encoding is shared with B
+    // fall through as BL encoding is shared with B
+    LLVM_FALLTHROUGH;
   case R_ARM_JUMP24:
   case R_ARM_PC24:
   case R_ARM_PLT32:
@@ -1908,7 +1901,8 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type,
     }
     // Bit 12 is 0 for BLX, 1 for BL
     write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12);
-  // Fall through as rest of encoding is the same as B.W
+    // Fall through as rest of encoding is the same as B.W
+    LLVM_FALLTHROUGH;
   case R_ARM_THM_JUMP24:
     // Encoding B  T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
     // FIXME: Use of I1 and I2 require v6T2ops
@@ -2132,7 +2126,7 @@ uint32_t MipsTargetInfo<ELFT>::getDynRel(uint32_t Type) const {
 
 template <class ELFT>
 void MipsTargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &) const {
-  write32<ELFT::TargetEndianness>(Buf, In<ELFT>::Plt->getVA());
+  write32<ELFT::TargetEndianness>(Buf, InX::Plt->getVA());
 }
 
 template <endianness E, uint8_t BSIZE, uint8_t SHIFT>
@@ -2201,7 +2195,7 @@ void MipsTargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const {
   write32<E>(Buf + 24, 0x0320f809); // jalr  $25
   write32<E>(Buf + 28, 0x2718fffe); // subu  $24, $24, 2
 
-  uint64_t GotPlt = In<ELFT>::GotPlt->getVA();
+  uint64_t GotPlt = InX::GotPlt->getVA();
   writeMipsHi16<E>(Buf, GotPlt);
   writeMipsLo16<E>(Buf + 4, GotPlt);
   writeMipsLo16<E>(Buf + 8, GotPlt);
diff --git a/ELF/Threads.h b/ELF/Threads.h
index 897432e69f8e..e01afd4d3fc9 100644
--- a/ELF/Threads.h
+++ b/ELF/Threads.h
@@ -61,8 +61,7 @@
 
 #include "Config.h"
 
-#include "lld/Core/Parallel.h"
-#include <algorithm>
+#include "llvm/Support/Parallel.h"
 #include <functional>
 
 namespace lld {
@@ -71,19 +70,17 @@ namespace elf {
 template <class IterTy, class FuncTy>
 void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
   if (Config->Threads)
-    parallel_for_each(Begin, End, Fn);
+    for_each(llvm::parallel::par, Begin, End, Fn);
   else
-    std::for_each(Begin, End, Fn);
+    for_each(llvm::parallel::seq, Begin, End, Fn);
 }
 
-inline void parallelFor(size_t Begin, size_t End,
-                        std::function<void(size_t)> Fn) {
-  if (Config->Threads) {
-    parallel_for(Begin, End, Fn);
-  } else {
-    for (size_t I = Begin; I < End; ++I)
-      Fn(I);
-  }
+inline void parallelForEachN(size_t Begin, size_t End,
+                             std::function<void(size_t)> Fn) {
+  if (Config->Threads)
+    for_each_n(llvm::parallel::par, Begin, End, Fn);
+  else
+    for_each_n(llvm::parallel::seq, Begin, End, Fn);
 }
 }
 }
diff --git a/ELF/Thunks.cpp b/ELF/Thunks.cpp
index 307ca5df2288..80ea69663c01 100644
--- a/ELF/Thunks.cpp
+++ b/ELF/Thunks.cpp
@@ -124,10 +124,10 @@ void ARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
 
 template <class ELFT>
 void ARMV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal<ELFT>(
+  this->ThunkSym = addSyntheticLocal(
       Saver.save("__ARMv7ABSLongThunk_" + this->Destination.getName()),
       STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal<ELFT>("$a", STT_NOTYPE, this->Offset, 0, &IS);
+  addSyntheticLocal("$a", STT_NOTYPE, this->Offset, 0, &IS);
 }
 
 template <class ELFT>
@@ -145,10 +145,10 @@ void ThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
 
 template <class ELFT>
 void ThumbV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal<ELFT>(
+  this->ThunkSym = addSyntheticLocal(
       Saver.save("__Thumbv7ABSLongThunk_" + this->Destination.getName()),
       STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal<ELFT>("$t", STT_NOTYPE, this->Offset, 0, &IS);
+  addSyntheticLocal("$t", STT_NOTYPE, this->Offset, 0, &IS);
 }
 
 template <class ELFT>
@@ -168,10 +168,10 @@ void ARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
 
 template <class ELFT>
 void ARMV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal<ELFT>(
+  this->ThunkSym = addSyntheticLocal(
       Saver.save("__ARMV7PILongThunk_" + this->Destination.getName()), STT_FUNC,
       this->Offset, size(), &IS);
-  addSyntheticLocal<ELFT>("$a", STT_NOTYPE, this->Offset, 0, &IS);
+  addSyntheticLocal("$a", STT_NOTYPE, this->Offset, 0, &IS);
 }
 
 template <class ELFT>
@@ -191,10 +191,10 @@ void ThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
 
 template <class ELFT>
 void ThumbV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal<ELFT>(
+  this->ThunkSym = addSyntheticLocal(
       Saver.save("__ThumbV7PILongThunk_" + this->Destination.getName()),
       STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal<ELFT>("$t", STT_NOTYPE, this->Offset, 0, &IS);
+  addSyntheticLocal("$t", STT_NOTYPE, this->Offset, 0, &IS);
 }
 
 // Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
@@ -212,7 +212,7 @@ void MipsThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &) const {
 }
 
 template <class ELFT> void MipsThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal<ELFT>(
+  this->ThunkSym = addSyntheticLocal(
       Saver.save("__LA25Thunk_" + this->Destination.getName()), STT_FUNC,
       this->Offset, size(), &IS);
 }
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 7f00e37ce7b0..4cdfce76202c 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -164,11 +164,10 @@ static void combineMergableSections() {
     uint64_t Flags = MS->Flags & ~(uint64_t)(SHF_GROUP | SHF_COMPRESSED);
     uint32_t Alignment = std::max<uint32_t>(MS->Alignment, MS->Entsize);
 
-    auto I =
-        llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) {
-          return Sec->Name == OutsecName && Sec->Flags == Flags &&
-                 Sec->Alignment == Alignment;
-        });
+    auto I = llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) {
+      return Sec->Name == OutsecName && Sec->Flags == Flags &&
+             Sec->Alignment == Alignment;
+    });
     if (I == MergeSections.end()) {
       MergeSyntheticSection *Syn =
           make<MergeSyntheticSection>(OutsecName, MS->Type, Flags, Alignment);
@@ -312,11 +311,11 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
 
   auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); };
 
-  In<ELFT>::DynStrTab = make<StringTableSection>(".dynstr", true);
-  In<ELFT>::Dynamic = make<DynamicSection<ELFT>>();
+  InX::DynStrTab = make<StringTableSection>(".dynstr", true);
+  InX::Dynamic = make<DynamicSection<ELFT>>();
   In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>(
       Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc);
-  In<ELFT>::ShStrTab = make<StringTableSection>(".shstrtab", false);
+  InX::ShStrTab = make<StringTableSection>(".shstrtab", false);
 
   Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC);
   Out::ElfHeader->Size = sizeof(Elf_Ehdr);
@@ -324,41 +323,41 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
   Out::ProgramHeaders->updateAlignment(Config->Wordsize);
 
   if (needsInterpSection<ELFT>()) {
-    In<ELFT>::Interp = createInterpSection();
-    Add(In<ELFT>::Interp);
+    InX::Interp = createInterpSection();
+    Add(InX::Interp);
   } else {
-    In<ELFT>::Interp = nullptr;
+    InX::Interp = nullptr;
   }
 
   if (!Config->Relocatable)
     Add(createCommentSection<ELFT>());
 
   if (Config->Strip != StripPolicy::All) {
-    In<ELFT>::StrTab = make<StringTableSection>(".strtab", false);
-    In<ELFT>::SymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::StrTab);
+    InX::StrTab = make<StringTableSection>(".strtab", false);
+    InX::SymTab = make<SymbolTableSection<ELFT>>(*InX::StrTab);
   }
 
   if (Config->BuildId != BuildIdKind::None) {
-    In<ELFT>::BuildId = make<BuildIdSection>();
-    Add(In<ELFT>::BuildId);
+    InX::BuildId = make<BuildIdSection>();
+    Add(InX::BuildId);
   }
 
-  In<ELFT>::Common = createCommonSection<ELFT>();
-  if (In<ELFT>::Common)
+  InX::Common = createCommonSection<ELFT>();
+  if (InX::Common)
     Add(InX::Common);
 
-  In<ELFT>::Bss = make<BssSection>(".bss");
-  Add(In<ELFT>::Bss);
-  In<ELFT>::BssRelRo = make<BssSection>(".bss.rel.ro");
-  Add(In<ELFT>::BssRelRo);
+  InX::Bss = make<BssSection>(".bss");
+  Add(InX::Bss);
+  InX::BssRelRo = make<BssSection>(".bss.rel.ro");
+  Add(InX::BssRelRo);
 
   // Add MIPS-specific sections.
   bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() ||
                       Config->Pic || Config->ExportDynamic;
   if (Config->EMachine == EM_MIPS) {
     if (!Config->Shared && HasDynSymTab) {
-      In<ELFT>::MipsRldMap = make<MipsRldMapSection>();
-      Add(In<ELFT>::MipsRldMap);
+      InX::MipsRldMap = make<MipsRldMapSection>();
+      Add(InX::MipsRldMap);
     }
     if (auto *Sec = MipsAbiFlagsSection<ELFT>::create())
       Add(Sec);
@@ -369,8 +368,8 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
   }
 
   if (HasDynSymTab) {
-    In<ELFT>::DynSymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::DynStrTab);
-    Add(In<ELFT>::DynSymTab);
+    InX::DynSymTab = make<SymbolTableSection<ELFT>>(*InX::DynStrTab);
+    Add(InX::DynSymTab);
 
     In<ELFT>::VerSym = make<VersionTableSection<ELFT>>();
     Add(In<ELFT>::VerSym);
@@ -384,8 +383,8 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
     Add(In<ELFT>::VerNeed);
 
     if (Config->GnuHash) {
-      In<ELFT>::GnuHashTab = make<GnuHashTableSection<ELFT>>();
-      Add(In<ELFT>::GnuHashTab);
+      InX::GnuHashTab = make<GnuHashTableSection>();
+      Add(InX::GnuHashTab);
     }
 
     if (Config->SysvHash) {
@@ -393,29 +392,29 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
       Add(In<ELFT>::HashTab);
     }
 
-    Add(In<ELFT>::Dynamic);
-    Add(In<ELFT>::DynStrTab);
+    Add(InX::Dynamic);
+    Add(InX::DynStrTab);
     Add(In<ELFT>::RelaDyn);
   }
 
   // Add .got. MIPS' .got is so different from the other archs,
   // it has its own class.
   if (Config->EMachine == EM_MIPS) {
-    In<ELFT>::MipsGot = make<MipsGotSection>();
-    Add(In<ELFT>::MipsGot);
+    InX::MipsGot = make<MipsGotSection>();
+    Add(InX::MipsGot);
   } else {
-    In<ELFT>::Got = make<GotSection<ELFT>>();
-    Add(In<ELFT>::Got);
+    InX::Got = make<GotSection<ELFT>>();
+    Add(InX::Got);
   }
 
-  In<ELFT>::GotPlt = make<GotPltSection>();
-  Add(In<ELFT>::GotPlt);
-  In<ELFT>::IgotPlt = make<IgotPltSection>();
-  Add(In<ELFT>::IgotPlt);
+  InX::GotPlt = make<GotPltSection>();
+  Add(InX::GotPlt);
+  InX::IgotPlt = make<IgotPltSection>();
+  Add(InX::IgotPlt);
 
   if (Config->GdbIndex) {
-    In<ELFT>::GdbIndex = make<GdbIndexSection>();
-    Add(In<ELFT>::GdbIndex);
+    InX::GdbIndex = make<GdbIndexSection>();
+    Add(InX::GdbIndex);
   }
 
   // We always need to add rel[a].plt to output if it has entries.
@@ -431,10 +430,10 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
       false /*Sort*/);
   Add(In<ELFT>::RelaIplt);
 
-  In<ELFT>::Plt = make<PltSection>(Target->PltHeaderSize);
-  Add(In<ELFT>::Plt);
-  In<ELFT>::Iplt = make<PltSection>(0);
-  Add(In<ELFT>::Iplt);
+  InX::Plt = make<PltSection>(Target->PltHeaderSize);
+  Add(InX::Plt);
+  InX::Iplt = make<PltSection>(0);
+  Add(InX::Iplt);
 
   if (!Config->Relocatable) {
     if (Config->EhFrameHdr) {
@@ -445,11 +444,11 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() {
     Add(In<ELFT>::EhFrame);
   }
 
-  if (In<ELFT>::SymTab)
-    Add(In<ELFT>::SymTab);
-  Add(In<ELFT>::ShStrTab);
-  if (In<ELFT>::StrTab)
-    Add(In<ELFT>::StrTab);
+  if (InX::SymTab)
+    Add(InX::SymTab);
+  Add(InX::ShStrTab);
+  if (InX::StrTab)
+    Add(InX::StrTab);
 }
 
 static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName,
@@ -504,7 +503,7 @@ static bool includeInSymtab(const SymbolBody &B) {
 // Local symbols are not in the linker's symbol table. This function scans
 // each object file's symbol table to copy local symbols to the output.
 template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
-  if (!In<ELFT>::SymTab)
+  if (!InX::SymTab)
     return;
   for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {
     for (SymbolBody *B : F->getLocalSymbols()) {
@@ -522,7 +521,7 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
       SectionBase *Sec = DR->Section;
       if (!shouldKeepInSymtab(Sec, B->getName(), *B))
         continue;
-      In<ELFT>::SymTab->addSymbol(B);
+      InX::SymTab->addSymbol(B);
     }
   }
 }
@@ -542,43 +541,17 @@ template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
     auto *Sym =
         make<DefinedRegular>("", /*IsLocal=*/true, /*StOther=*/0, STT_SECTION,
                              /*Value=*/0, /*Size=*/0, IS, nullptr);
-    In<ELFT>::SymTab->addSymbol(Sym);
+    InX::SymTab->addSymbol(Sym);
   }
 }
 
-// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that
-// we would like to make sure appear is a specific order to maximize their
-// coverage by a single signed 16-bit offset from the TOC base pointer.
-// Conversely, the special .tocbss section should be first among all SHT_NOBITS
-// sections. This will put it next to the loaded special PPC64 sections (and,
-// thus, within reach of the TOC base pointer).
-static int getPPC64SectionRank(StringRef SectionName) {
-  return StringSwitch<int>(SectionName)
-      .Case(".tocbss", 0)
-      .Case(".branch_lt", 2)
-      .Case(".toc", 3)
-      .Case(".toc1", 4)
-      .Case(".opd", 5)
-      .Default(1);
-}
-
-// All sections with SHF_MIPS_GPREL flag should be grouped together
-// because data in these sections is addressable with a gp relative address.
-static int getMipsSectionRank(const OutputSection *S) {
-  if ((S->Flags & SHF_MIPS_GPREL) == 0)
-    return 0;
-  if (S->Name == ".got")
-    return 1;
-  return 2;
-}
-
 // Today's loaders have a feature to make segments read-only after
 // processing dynamic relocations to enhance security. PT_GNU_RELRO
 // is defined for that.
 //
 // This function returns true if a section needs to be put into a
 // PT_GNU_RELRO segment.
-template <class ELFT> bool elf::isRelroSection(const OutputSection *Sec) {
+bool elf::isRelroSection(const OutputSection *Sec) {
   if (!Config->ZRelro)
     return false;
 
@@ -613,27 +586,27 @@ template <class ELFT> bool elf::isRelroSection(const OutputSection *Sec) {
   // .got contains pointers to external symbols. They are resolved by
   // the dynamic linker when a module is loaded into memory, and after
   // that they are not expected to change. So, it can be in RELRO.
-  if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec)
+  if (InX::Got && Sec == InX::Got->OutSec)
     return true;
 
   // .got.plt contains pointers to external function symbols. They are
   // by default resolved lazily, so we usually cannot put it into RELRO.
   // However, if "-z now" is given, the lazy symbol resolution is
   // disabled, which enables us to put it into RELRO.
-  if (Sec == In<ELFT>::GotPlt->OutSec)
+  if (Sec == InX::GotPlt->OutSec)
     return Config->ZNow;
 
   // .dynamic section contains data for the dynamic linker, and
   // there's no need to write to it at runtime, so it's better to put
   // it into RELRO.
-  if (Sec == In<ELFT>::Dynamic->OutSec)
+  if (Sec == InX::Dynamic->OutSec)
     return true;
 
   // .bss.rel.ro is used for copy relocations for read-only symbols.
   // Since the dynamic linker needs to process copy relocations, the
   // section cannot be read-only, but once initialized, they shouldn't
   // change.
-  if (Sec == In<ELFT>::BssRelRo->OutSec)
+  if (Sec == InX::BssRelRo->OutSec)
     return true;
 
   // Sections with some special names are put into RELRO. This is a
@@ -645,105 +618,149 @@ template <class ELFT> bool elf::isRelroSection(const OutputSection *Sec) {
          S == ".eh_frame" || S == ".openbsd.randomdata";
 }
 
-template <class ELFT>
-static bool compareSectionsNonScript(const OutputSection *A,
-                                     const OutputSection *B) {
-  // Put .interp first because some loaders want to see that section
-  // on the first page of the executable file when loaded into memory.
-  bool AIsInterp = A->Name == ".interp";
-  bool BIsInterp = B->Name == ".interp";
-  if (AIsInterp != BIsInterp)
-    return AIsInterp;
+// We compute a rank for each section. The rank indicates where the
+// section should be placed in the file.  Instead of using simple
+// numbers (0,1,2...), we use a series of flags. One for each decision
+// point when placing the section.
+// Using flags has two key properties:
+// * It is easy to check if a give branch was taken.
+// * It is easy two see how similar two ranks are (see getRankProximity).
+enum RankFlags {
+  RF_NOT_ADDR_SET = 1 << 16,
+  RF_NOT_INTERP = 1 << 15,
+  RF_NOT_ALLOC = 1 << 14,
+  RF_WRITE = 1 << 13,
+  RF_EXEC = 1 << 12,
+  RF_NON_TLS_BSS = 1 << 11,
+  RF_NON_TLS_BSS_RO = 1 << 10,
+  RF_NOT_TLS = 1 << 9,
+  RF_BSS = 1 << 8,
+  RF_PPC_NOT_TOCBSS = 1 << 7,
+  RF_PPC_OPD = 1 << 6,
+  RF_PPC_TOCL = 1 << 5,
+  RF_PPC_TOC = 1 << 4,
+  RF_PPC_BRANCH_LT = 1 << 3,
+  RF_MIPS_GPREL = 1 << 2,
+  RF_MIPS_NOT_GOT = 1 << 1
+};
 
-  // Allocatable sections go first to reduce the total PT_LOAD size and
-  // so debug info doesn't change addresses in actual code.
-  bool AIsAlloc = A->Flags & SHF_ALLOC;
-  bool BIsAlloc = B->Flags & SHF_ALLOC;
-  if (AIsAlloc != BIsAlloc)
-    return AIsAlloc;
-
-  // We don't have any special requirements for the relative order of two non
-  // allocatable sections.
-  if (!AIsAlloc)
-    return false;
+static unsigned getSectionRank(const OutputSection *Sec) {
+  unsigned Rank = 0;
 
   // We want to put section specified by -T option first, so we
   // can start assigning VA starting from them later.
-  auto AAddrSetI = Config->SectionStartMap.find(A->Name);
-  auto BAddrSetI = Config->SectionStartMap.find(B->Name);
-  bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end();
-  bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end();
-  if (AHasAddrSet != BHasAddrSet)
-    return AHasAddrSet;
-  if (AHasAddrSet)
-    return AAddrSetI->second < BAddrSetI->second;
+  if (Config->SectionStartMap.count(Sec->Name))
+    return Rank;
+  Rank |= RF_NOT_ADDR_SET;
+
+  // Put .interp first because some loaders want to see that section
+  // on the first page of the executable file when loaded into memory.
+  if (Sec->Name == ".interp")
+    return Rank;
+  Rank |= RF_NOT_INTERP;
+
+  // Allocatable sections go first to reduce the total PT_LOAD size and
+  // so debug info doesn't change addresses in actual code.
+  if (!(Sec->Flags & SHF_ALLOC))
+    return Rank | RF_NOT_ALLOC;
 
   // We want the read only sections first so that they go in the PT_LOAD
   // covering the program headers at the start of the file.
-  bool AIsWritable = A->Flags & SHF_WRITE;
-  bool BIsWritable = B->Flags & SHF_WRITE;
-  if (AIsWritable != BIsWritable)
-    return BIsWritable;
+  if (Sec->Flags & SHF_WRITE)
+    Rank |= RF_WRITE;
 
-  if (!Config->SingleRoRx) {
+  if (Sec->Flags & SHF_EXECINSTR) {
     // For a corresponding reason, put non exec sections first (the program
     // header PT_LOAD is not executable).
     // We only do that if we are not using linker scripts, since with linker
     // scripts ro and rx sections are in the same PT_LOAD, so their relative
     // order is not important. The same applies for -no-rosegment.
-    bool AIsExec = A->Flags & SHF_EXECINSTR;
-    bool BIsExec = B->Flags & SHF_EXECINSTR;
-    if (AIsExec != BIsExec)
-      return BIsExec;
+    if ((Rank & RF_WRITE) || !Config->SingleRoRx)
+      Rank |= RF_EXEC;
   }
 
   // If we got here we know that both A and B are in the same PT_LOAD.
 
-  bool AIsTls = A->Flags & SHF_TLS;
-  bool BIsTls = B->Flags & SHF_TLS;
-  bool AIsNoBits = A->Type == SHT_NOBITS;
-  bool BIsNoBits = B->Type == SHT_NOBITS;
+  bool IsTls = Sec->Flags & SHF_TLS;
+  bool IsNoBits = Sec->Type == SHT_NOBITS;
 
   // The first requirement we have is to put (non-TLS) nobits sections last. The
   // reason is that the only thing the dynamic linker will see about them is a
   // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the
   // PT_LOAD, so that has to correspond to the nobits sections.
-  bool AIsNonTlsNoBits = AIsNoBits && !AIsTls;
-  bool BIsNonTlsNoBits = BIsNoBits && !BIsTls;
-  if (AIsNonTlsNoBits != BIsNonTlsNoBits)
-    return BIsNonTlsNoBits;
+  bool IsNonTlsNoBits = IsNoBits && !IsTls;
+  if (IsNonTlsNoBits)
+    Rank |= RF_NON_TLS_BSS;
 
   // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo
   // sections after r/w ones, so that the RelRo sections are contiguous.
-  bool AIsRelRo = isRelroSection<ELFT>(A);
-  bool BIsRelRo = isRelroSection<ELFT>(B);
-  if (AIsRelRo != BIsRelRo)
-    return AIsNonTlsNoBits ? AIsRelRo : BIsRelRo;
+  bool IsRelRo = isRelroSection(Sec);
+  if (IsNonTlsNoBits && !IsRelRo)
+    Rank |= RF_NON_TLS_BSS_RO;
+  if (!IsNonTlsNoBits && IsRelRo)
+    Rank |= RF_NON_TLS_BSS_RO;
 
   // The TLS initialization block needs to be a single contiguous block in a R/W
   // PT_LOAD, so stick TLS sections directly before the other RelRo R/W
   // sections. The TLS NOBITS sections are placed here as they don't take up
   // virtual address space in the PT_LOAD.
-  if (AIsTls != BIsTls)
-    return AIsTls;
+  if (!IsTls)
+    Rank |= RF_NOT_TLS;
 
   // Within the TLS initialization block, the non-nobits sections need to appear
   // first.
-  if (AIsNoBits != BIsNoBits)
-    return BIsNoBits;
+  if (IsNoBits)
+    Rank |= RF_BSS;
 
-  // Some architectures have additional ordering restrictions for sections
-  // within the same PT_LOAD.
-  if (Config->EMachine == EM_PPC64)
-    return getPPC64SectionRank(A->Name) < getPPC64SectionRank(B->Name);
-  if (Config->EMachine == EM_MIPS)
-    return getMipsSectionRank(A) < getMipsSectionRank(B);
+  // // Some architectures have additional ordering restrictions for sections
+  // // within the same PT_LOAD.
+  if (Config->EMachine == EM_PPC64) {
+    // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections
+    // that we would like to make sure appear is a specific order to maximize
+    // their coverage by a single signed 16-bit offset from the TOC base
+    // pointer. Conversely, the special .tocbss section should be first among
+    // all SHT_NOBITS sections. This will put it next to the loaded special
+    // PPC64 sections (and, thus, within reach of the TOC base pointer).
+    StringRef Name = Sec->Name;
+    if (Name != ".tocbss")
+      Rank |= RF_PPC_NOT_TOCBSS;
 
+    if (Name == ".opd")
+      Rank |= RF_PPC_OPD;
+
+    if (Name == ".toc1")
+      Rank |= RF_PPC_TOCL;
+
+    if (Name == ".toc")
+      Rank |= RF_PPC_TOC;
+
+    if (Name == ".branch_lt")
+      Rank |= RF_PPC_BRANCH_LT;
+  }
+  if (Config->EMachine == EM_MIPS) {
+    // All sections with SHF_MIPS_GPREL flag should be grouped together
+    // because data in these sections is addressable with a gp relative address.
+    if (Sec->Flags & SHF_MIPS_GPREL)
+      Rank |= RF_MIPS_GPREL;
+
+    if (Sec->Name != ".got")
+      Rank |= RF_MIPS_NOT_GOT;
+  }
+
+  return Rank;
+}
+
+static bool compareSectionsNonScript(const OutputSection *A,
+                                     const OutputSection *B) {
+  if (A->SortRank != B->SortRank)
+    return A->SortRank < B->SortRank;
+  if (!(A->SortRank & RF_NOT_ADDR_SET))
+    return Config->SectionStartMap.lookup(A->Name) <
+           Config->SectionStartMap.lookup(B->Name);
   return false;
 }
 
 // Output section ordering is determined by this function.
-template <class ELFT>
 static bool compareSections(const OutputSection *A, const OutputSection *B) {
   // For now, put sections mentioned in a linker script
   // first. Sections not on linker script will have a SectionIndex of
@@ -753,7 +770,7 @@ static bool compareSections(const OutputSection *A, const OutputSection *B) {
   if (AIndex != BIndex)
     return AIndex < BIndex;
 
-  return compareSectionsNonScript<ELFT>(A, B);
+  return compareSectionsNonScript(A, B);
 }
 
 // Program header entry
@@ -802,7 +819,7 @@ addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val,
 // need these symbols, since IRELATIVE relocs are resolved through GOT
 // and PLT. For details, see http://www.airs.com/blog/archives/403.
 template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
-  if (In<ELFT>::DynSymTab)
+  if (InX::DynSymTab)
     return;
   StringRef S = Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start";
   addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0, STV_HIDDEN, STB_WEAK);
@@ -855,16 +872,19 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() {
   // static linking the linker is required to optimize away any references to
   // __tls_get_addr, so it's not defined anywhere. Create a hidden definition
   // to avoid the undefined symbol error.
-  if (!In<ELFT>::DynSymTab)
+  if (!InX::DynSymTab)
     Symtab<ELFT>::X->addIgnored("__tls_get_addr");
 
+  // __ehdr_start is the location of ELF file headers. Note that we define
+  // this symbol unconditionally even when using a linker script, which
+  // differs from the behavior implemented by GNU linker which only define
+  // this symbol if ELF headers are in the memory mapped segment.
+  addOptionalRegular<ELFT>("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN);
+
   // If linker script do layout we do not need to create any standart symbols.
   if (Script->Opt.HasSections)
     return;
 
-  // __ehdr_start is the location of ELF file headers.
-  addOptionalRegular<ELFT>("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN);
-
   auto Add = [](StringRef S) {
     return addOptionalRegular<ELFT>(S, Out::ElfHeader, 0, STV_DEFAULT);
   };
@@ -960,18 +980,36 @@ template <class ELFT> void Writer<ELFT>::createSections() {
     Sec->assignOffsets();
 }
 
-static bool canSharePtLoad(const OutputSection &S1, const OutputSection &S2) {
-  if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC))
-    return false;
+// We want to find how similar two ranks are.
+// The more branches in getSectionRank that match, the more similar they are.
+// Since each branch corresponds to a bit flag, we can just use
+// countLeadingZeros.
+static unsigned getRankProximity(OutputSection *A, OutputSection *B) {
+  return countLeadingZeros(A->SortRank ^ B->SortRank);
+}
 
-  bool S1IsWrite = S1.Flags & SHF_WRITE;
-  bool S2IsWrite = S2.Flags & SHF_WRITE;
-  if (S1IsWrite != S2IsWrite)
-    return false;
+// We want to place orphan sections so that they share as much
+// characteristics with their neighbors as possible. For example, if
+// both are rw, or both are tls.
+template <typename ELFT>
+static std::vector<OutputSection *>::iterator
+findOrphanPos(std::vector<OutputSection *>::iterator B,
+              std::vector<OutputSection *>::iterator E) {
+  OutputSection *Sec = *E;
 
-  if (!S1IsWrite)
-    return true; // RO and RX share a PT_LOAD with linker scripts.
-  return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR);
+  // Find the first element that has as close a rank as possible.
+  auto I = std::max_element(B, E, [=](OutputSection *A, OutputSection *B) {
+    return getRankProximity(Sec, A) < getRankProximity(Sec, B);
+  });
+  if (I == E)
+    return E;
+
+  // Consider all existing sections with the same proximity.
+  unsigned Proximity = getRankProximity(Sec, *I);
+  while (I != E && getRankProximity(Sec, *I) == Proximity &&
+         Sec->SortRank >= (*I)->SortRank)
+    ++I;
+  return I;
 }
 
 template <class ELFT> void Writer<ELFT>::sortSections() {
@@ -979,12 +1017,18 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
   // relative order for SHF_LINK_ORDER sections.
   if (Config->Relocatable)
     return;
+
+  if (Script->Opt.HasSections)
+    Script->adjustSectionsBeforeSorting();
+
+  for (OutputSection *Sec : OutputSections)
+    Sec->SortRank = getSectionRank(Sec);
+
   if (!Script->Opt.HasSections) {
     std::stable_sort(OutputSections.begin(), OutputSections.end(),
-                     compareSectionsNonScript<ELFT>);
+                     compareSectionsNonScript);
     return;
   }
-  Script->adjustSectionsBeforeSorting();
 
   // The order of the sections in the script is arbitrary and may not agree with
   // compareSectionsNonScript. This means that we cannot easily define a
@@ -1004,14 +1048,13 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
   //   .d (ro) # not in script
   //
   // The way we define an order then is:
-  // *  First put script sections at the start and sort the script and
-  //    non-script sections independently.
+  // *  First put script sections at the start and sort the script sections.
   // *  Move each non-script section to its preferred position. We try
   //    to put each section in the last position where it it can share
   //    a PT_LOAD.
 
   std::stable_sort(OutputSections.begin(), OutputSections.end(),
-                   compareSections<ELFT>);
+                   compareSections);
 
   auto I = OutputSections.begin();
   auto E = OutputSections.end();
@@ -1019,31 +1062,16 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
       std::find_if(OutputSections.begin(), E,
                    [](OutputSection *S) { return S->SectionIndex == INT_MAX; });
   while (NonScriptI != E) {
-    auto BestPos = std::max_element(
-        I, NonScriptI, [&](OutputSection *&A, OutputSection *&B) {
-          bool ACanSharePtLoad = canSharePtLoad(**NonScriptI, *A);
-          bool BCanSharePtLoad = canSharePtLoad(**NonScriptI, *B);
-          if (ACanSharePtLoad != BCanSharePtLoad)
-            return BCanSharePtLoad;
+    auto Pos = findOrphanPos<ELFT>(I, NonScriptI);
 
-          bool ACmp = compareSectionsNonScript<ELFT>(*NonScriptI, A);
-          bool BCmp = compareSectionsNonScript<ELFT>(*NonScriptI, B);
-          if (ACmp != BCmp)
-            return BCmp; // FIXME: missing test
-
-          size_t PosA = &A - &OutputSections[0];
-          size_t PosB = &B - &OutputSections[0];
-          return ACmp ? PosA > PosB : PosA < PosB;
-        });
-
-    // max_element only returns NonScriptI if the range is empty. If the range
-    // is not empty we should consider moving the the element forward one
-    // position.
-    if (BestPos != NonScriptI &&
-        !compareSectionsNonScript<ELFT>(*NonScriptI, *BestPos))
-      ++BestPos;
-    std::rotate(BestPos, NonScriptI, NonScriptI + 1);
-    ++NonScriptI;
+    // As an optimization, find all sections with the same sort rank
+    // and insert them with one rotate.
+    unsigned Rank = (*NonScriptI)->SortRank;
+    auto End = std::find_if(NonScriptI + 1, E, [=](OutputSection *Sec) {
+      return Sec->SortRank != Rank;
+    });
+    std::rotate(Pos, NonScriptI, End);
+    NonScriptI = End;
   }
 
   Script->adjustSectionsAfterSorting();
@@ -1103,8 +1131,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
   // It should be okay as no one seems to care about the type.
   // Even the author of gold doesn't remember why gold behaves that way.
   // https://sourceware.org/ml/binutils/2002-03/msg00360.html
-  if (In<ELFT>::DynSymTab)
-    addRegular<ELFT>("_DYNAMIC", In<ELFT>::Dynamic, 0);
+  if (InX::DynSymTab)
+    addRegular<ELFT>("_DYNAMIC", InX::Dynamic, 0);
 
   // Define __rel[a]_iplt_{start,end} symbols if needed.
   addRelIpltSymbols();
@@ -1119,10 +1147,10 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
   // we can correctly decide if a dynamic relocation is needed.
   forEachRelSec(scanRelocations<ELFT>);
 
-  if (In<ELFT>::Plt && !In<ELFT>::Plt->empty())
-    In<ELFT>::Plt->addSymbols();
-  if (In<ELFT>::Iplt && !In<ELFT>::Iplt->empty())
-    In<ELFT>::Iplt->addSymbols();
+  if (InX::Plt && !InX::Plt->empty())
+    InX::Plt->addSymbols();
+  if (InX::Iplt && !InX::Iplt->empty())
+    InX::Iplt->addSymbols();
 
   // Now that we have defined all possible global symbols including linker-
   // synthesized ones. Visit all symbols to give the finishing touches.
@@ -1131,11 +1159,11 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
 
     if (!includeInSymtab(*Body))
       continue;
-    if (In<ELFT>::SymTab)
-      In<ELFT>::SymTab->addSymbol(Body);
+    if (InX::SymTab)
+      InX::SymTab->addSymbol(Body);
 
-    if (In<ELFT>::DynSymTab && S->includeInDynsym()) {
-      In<ELFT>::DynSymTab->addSymbol(Body);
+    if (InX::DynSymTab && S->includeInDynsym()) {
+      InX::DynSymTab->addSymbol(Body);
       if (auto *SS = dyn_cast<SharedSymbol>(Body))
         if (cast<SharedFile<ELFT>>(SS->File)->isNeeded())
           In<ELFT>::VerNeed->addSymbol(SS);
@@ -1161,7 +1189,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
   unsigned I = 1;
   for (OutputSection *Sec : OutputSections) {
     Sec->SectionIndex = I++;
-    Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->Name);
+    Sec->ShName = InX::ShStrTab->addString(Sec->Name);
   }
 
   // Binary and relocatable output does not have PHDRS.
@@ -1175,15 +1203,14 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
 
   // Dynamic section must be the last one in this list and dynamic
   // symbol table section (DynSymTab) must be the first one.
-  applySynthetic({In<ELFT>::DynSymTab,  In<ELFT>::Bss,      In<ELFT>::BssRelRo,
-                  In<ELFT>::GnuHashTab, In<ELFT>::HashTab,  In<ELFT>::SymTab,
-                  In<ELFT>::ShStrTab,   In<ELFT>::StrTab,   In<ELFT>::VerDef,
-                  In<ELFT>::DynStrTab,  In<ELFT>::GdbIndex, In<ELFT>::Got,
-                  In<ELFT>::MipsGot,    In<ELFT>::IgotPlt,  In<ELFT>::GotPlt,
-                  In<ELFT>::RelaDyn,    In<ELFT>::RelaIplt, In<ELFT>::RelaPlt,
-                  In<ELFT>::Plt,        In<ELFT>::Iplt,     In<ELFT>::Plt,
-                  In<ELFT>::EhFrameHdr, In<ELFT>::VerSym,   In<ELFT>::VerNeed,
-                  In<ELFT>::Dynamic},
+  applySynthetic({InX::DynSymTab,    InX::Bss,           InX::BssRelRo,
+                  InX::GnuHashTab,   In<ELFT>::HashTab,  InX::SymTab,
+                  InX::ShStrTab,     InX::StrTab,        In<ELFT>::VerDef,
+                  InX::DynStrTab,    InX::GdbIndex,      InX::Got,
+                  InX::MipsGot,      InX::IgotPlt,       InX::GotPlt,
+                  In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, In<ELFT>::RelaPlt,
+                  InX::Plt,          InX::Iplt,          In<ELFT>::EhFrameHdr,
+                  In<ELFT>::VerSym,  In<ELFT>::VerNeed,  InX::Dynamic},
                  [](SyntheticSection *SS) { SS->finalizeContents(); });
 
   // Some architectures use small displacements for jump instructions.
@@ -1198,7 +1225,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     // when no more Thunks are added
     ThunkCreator<ELFT> TC;
     if (TC.createThunks(OutputSections))
-      applySynthetic({In<ELFT>::MipsGot},
+      applySynthetic({InX::MipsGot},
                      [](SyntheticSection *SS) { SS->updateAllocSize(); });
   }
   // Fill other section headers. The dynamic table is finalized
@@ -1214,7 +1241,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
                   [](OutputSection *S) { S->maybeCompress<ELFT>(); });
 
   // createThunks may have added local symbols to the static symbol table
-  applySynthetic({In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab},
+  applySynthetic({InX::SymTab, InX::ShStrTab, InX::StrTab},
                  [](SyntheticSection *SS) { SS->postThunkContents(); });
 }
 
@@ -1332,7 +1359,7 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() {
     // different flags or is loaded at a discontiguous address using AT linker
     // script command.
     uint64_t NewFlags = computeFlags(Sec->getPhdrFlags());
-    if (Script->hasLMA(Sec->Name) || Flags != NewFlags) {
+    if (Script->hasLMA(Sec) || Flags != NewFlags) {
       Load = AddHdr(PT_LOAD, NewFlags);
       Flags = NewFlags;
     }
@@ -1349,15 +1376,15 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() {
     Ret.push_back(std::move(TlsHdr));
 
   // Add an entry for .dynamic.
-  if (In<ELFT>::DynSymTab)
-    AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags())
-        ->add(In<ELFT>::Dynamic->OutSec);
+  if (InX::DynSymTab)
+    AddHdr(PT_DYNAMIC, InX::Dynamic->OutSec->getPhdrFlags())
+        ->add(InX::Dynamic->OutSec);
 
   // PT_GNU_RELRO includes all sections that should be marked as
   // read-only by dynamic linker after proccessing relocations.
   PhdrEntry RelRo(PT_GNU_RELRO, PF_R);
   for (OutputSection *Sec : OutputSections)
-    if (needsPtLoad(Sec) && isRelroSection<ELFT>(Sec))
+    if (needsPtLoad(Sec) && isRelroSection(Sec))
       RelRo.add(Sec);
   if (RelRo.First)
     Ret.push_back(std::move(RelRo));
@@ -1395,7 +1422,7 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() {
   PhdrEntry *Note = nullptr;
   for (OutputSection *Sec : OutputSections) {
     if (Sec->Type == SHT_NOTE) {
-      if (!Note || Script->hasLMA(Sec->Name))
+      if (!Note || Script->hasLMA(Sec))
         Note = AddHdr(PT_NOTE, PF_R);
       Note->add(Sec);
     } else {
@@ -1547,7 +1574,7 @@ template <class ELFT> uint64_t Writer<ELFT>::getEntryAddr() {
   if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry))
     return B->getVA();
   uint64_t Addr;
-  if (!Config->Entry.getAsInteger(0, Addr))
+  if (to_integer(Config->Entry, Addr))
     return Addr;
 
   // Case 4
@@ -1649,7 +1676,7 @@ template <class ELFT> void Writer<ELFT>::writeHeader() {
   EHdr->e_phnum = Phdrs.size();
   EHdr->e_shentsize = sizeof(Elf_Shdr);
   EHdr->e_shnum = OutputSections.size() + 1;
-  EHdr->e_shstrndx = In<ELFT>::ShStrTab->OutSec->SectionIndex;
+  EHdr->e_shstrndx = InX::ShStrTab->OutSec->SectionIndex;
 
   if (Config->EMachine == EM_ARM)
     // We don't currently use any features incompatible with EF_ARM_EABI_VER5,
@@ -1743,21 +1770,16 @@ template <class ELFT> void Writer<ELFT>::writeSections() {
 }
 
 template <class ELFT> void Writer<ELFT>::writeBuildId() {
-  if (!In<ELFT>::BuildId || !In<ELFT>::BuildId->OutSec)
+  if (!InX::BuildId || !InX::BuildId->OutSec)
     return;
 
   // Compute a hash of all sections of the output file.
   uint8_t *Start = Buffer->getBufferStart();
   uint8_t *End = Start + FileSize;
-  In<ELFT>::BuildId->writeBuildId({Start, End});
+  InX::BuildId->writeBuildId({Start, End});
 }
 
 template void elf::writeResult<ELF32LE>();
 template void elf::writeResult<ELF32BE>();
 template void elf::writeResult<ELF64LE>();
 template void elf::writeResult<ELF64BE>();
-
-template bool elf::isRelroSection<ELF32LE>(const OutputSection *);
-template bool elf::isRelroSection<ELF32BE>(const OutputSection *);
-template bool elf::isRelroSection<ELF64LE>(const OutputSection *);
-template bool elf::isRelroSection<ELF64BE>(const OutputSection *);
diff --git a/ELF/Writer.h b/ELF/Writer.h
index 8b965f7beddb..17fbda394a20 100644
--- a/ELF/Writer.h
+++ b/ELF/Writer.h
@@ -24,7 +24,7 @@ template <class ELFT> class ObjectFile;
 template <class ELFT> class SymbolTable;
 template <class ELFT> void writeResult();
 template <class ELFT> void markLive();
-template <class ELFT> bool isRelroSection(const OutputSection *Sec);
+bool isRelroSection(const OutputSection *Sec);
 
 // This describes a program header entry.
 // Each contains type, access flags and range of output sections that will be
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index d4f3b058efb7..112ce35e8cf4 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,6 +1,6 @@
 if (LLVM_ENABLE_SPHINX)
+  include(AddSphinxTarget)
   if (SPHINX_FOUND)
-    include(AddSphinxTarget)
     if (${SPHINX_OUTPUT_HTML})
       add_sphinx_target(html lld)
     endif()
diff --git a/include/lld/Core/Parallel.h b/include/lld/Core/Parallel.h
deleted file mode 100644
index 58fa87e85c51..000000000000
--- a/include/lld/Core/Parallel.h
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===//
-//
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLD_CORE_PARALLEL_H
-#define LLD_CORE_PARALLEL_H
-
-#include "lld/Core/LLVM.h"
-#include "lld/Core/TaskGroup.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Config/llvm-config.h"
-
-#include <algorithm>
-
-#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
-#include <concrt.h>
-#include <ppl.h>
-#endif
-
-namespace lld {
-
-#if !LLVM_ENABLE_THREADS
-template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
-  std::sort(Start, End, Comp);
-}
-#elif defined(_MSC_VER)
-// Use ppl parallel_sort on Windows.
-template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
-  concurrency::parallel_sort(Start, End, Comp);
-}
-#else
-namespace detail {
-const ptrdiff_t MinParallelSize = 1024;
-
-/// \brief Inclusive median.
-template <class RandomAccessIterator, class Comparator>
-RandomAccessIterator medianOf3(RandomAccessIterator Start,
-                               RandomAccessIterator End,
-                               const Comparator &Comp) {
-  RandomAccessIterator Mid = Start + (std::distance(Start, End) / 2);
-  return Comp(*Start, *(End - 1))
-             ? (Comp(*Mid, *(End - 1)) ? (Comp(*Start, *Mid) ? Mid : Start)
-                                       : End - 1)
-             : (Comp(*Mid, *Start) ? (Comp(*(End - 1), *Mid) ? Mid : End - 1)
-                                   : Start);
-}
-
-template <class RandomAccessIterator, class Comparator>
-void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End,
-                         const Comparator &Comp, TaskGroup &TG, size_t Depth) {
-  // Do a sequential sort for small inputs.
-  if (std::distance(Start, End) < detail::MinParallelSize || Depth == 0) {
-    std::sort(Start, End, Comp);
-    return;
-  }
-
-  // Partition.
-  auto Pivot = medianOf3(Start, End, Comp);
-  // Move Pivot to End.
-  std::swap(*(End - 1), *Pivot);
-  Pivot = std::partition(Start, End - 1, [&Comp, End](decltype(*Start) V) {
-    return Comp(V, *(End - 1));
-  });
-  // Move Pivot to middle of partition.
-  std::swap(*Pivot, *(End - 1));
-
-  // Recurse.
-  TG.spawn([=, &Comp, &TG] {
-    parallel_quick_sort(Start, Pivot, Comp, TG, Depth - 1);
-  });
-  parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1);
-}
-}
-
-template <class RandomAccessIterator, class Comparator>
-void parallel_sort(
-    RandomAccessIterator Start, RandomAccessIterator End,
-    const Comparator &Comp = std::less<
-        typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
-  TaskGroup TG;
-  detail::parallel_quick_sort(Start, End, Comp, TG,
-                              llvm::Log2_64(std::distance(Start, End)) + 1);
-}
-#endif
-
-template <class T> void parallel_sort(T *Start, T *End) {
-  parallel_sort(Start, End, std::less<T>());
-}
-
-#if !LLVM_ENABLE_THREADS
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  std::for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  for (IndexTy I = Begin; I != End; ++I)
-    Fn(I);
-}
-#elif defined(_MSC_VER)
-// Use ppl parallel_for_each on Windows.
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  concurrency::parallel_for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  concurrency::parallel_for(Begin, End, Fn);
-}
-#else
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  // TaskGroup has a relatively high overhead, so we want to reduce
-  // the number of spawn() calls. We'll create up to 1024 tasks here.
-  // (Note that 1024 is an arbitrary number. This code probably needs
-  // improving to take the number of available cores into account.)
-  ptrdiff_t TaskSize = std::distance(Begin, End) / 1024;
-  if (TaskSize == 0)
-    TaskSize = 1;
-
-  TaskGroup TG;
-  while (TaskSize <= std::distance(Begin, End)) {
-    TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); });
-    Begin += TaskSize;
-  }
-  TG.spawn([=, &Fn] { std::for_each(Begin, End, Fn); });
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  ptrdiff_t TaskSize = (End - Begin) / 1024;
-  if (TaskSize == 0)
-    TaskSize = 1;
-
-  TaskGroup TG;
-  IndexTy I = Begin;
-  for (; I + TaskSize < End; I += TaskSize) {
-    TG.spawn([=, &Fn] {
-      for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
-        Fn(J);
-    });
-  }
-  TG.spawn([=, &Fn] {
-    for (IndexTy J = I; J < End; ++J)
-      Fn(J);
-  });
-}
-#endif
-} // End namespace lld
-
-#endif // LLD_CORE_PARALLEL_H
diff --git a/include/lld/Core/TaskGroup.h b/include/lld/Core/TaskGroup.h
deleted file mode 100644
index 82e9122f4ae2..000000000000
--- a/include/lld/Core/TaskGroup.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- lld/Core/TaskGroup.h - Task Group ----------------------------------===//
-//
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLD_CORE_TASKGROUP_H
-#define LLD_CORE_TASKGROUP_H
-
-#include "lld/Core/LLVM.h"
-
-#include <condition_variable>
-#include <functional>
-#include <mutex>
-
-namespace lld {
-/// \brief Allows one or more threads to wait on a potentially unknown number of
-///   events.
-///
-/// A latch starts at \p count. inc() increments this, and dec() decrements it.
-/// All calls to sync() will block while the count is not 0.
-///
-/// Calling dec() on a Latch with a count of 0 has undefined behaivor.
-class Latch {
-  uint32_t _count;
-  mutable std::mutex _condMut;
-  mutable std::condition_variable _cond;
-
-public:
-  explicit Latch(uint32_t count = 0) : _count(count) {}
-  ~Latch() { sync(); }
-
-  void inc() {
-    std::unique_lock<std::mutex> lock(_condMut);
-    ++_count;
-  }
-
-  void dec() {
-    std::unique_lock<std::mutex> lock(_condMut);
-    if (--_count == 0)
-      _cond.notify_all();
-  }
-
-  void sync() const {
-    std::unique_lock<std::mutex> lock(_condMut);
-    _cond.wait(lock, [&] { return _count == 0; });
-  }
-};
-
-/// \brief Allows launching a number of tasks and waiting for them to finish
-///   either explicitly via sync() or implicitly on destruction.
-class TaskGroup {
-  Latch _latch;
-
-public:
-  void spawn(std::function<void()> f);
-
-  void sync() const { _latch.sync(); }
-};
-}
-
-#endif
diff --git a/lib/Core/CMakeLists.txt b/lib/Core/CMakeLists.txt
index cdd4e679ffa2..f2bf90509295 100644
--- a/lib/Core/CMakeLists.txt
+++ b/lib/Core/CMakeLists.txt
@@ -12,7 +12,6 @@ add_lld_library(lldCore
   Resolver.cpp
   SymbolTable.cpp
   TargetOptionsCommandFlags.cpp
-  TaskGroup.cpp
   Writer.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/lib/Core/TaskGroup.cpp b/lib/Core/TaskGroup.cpp
deleted file mode 100644
index d4de48ce3dc4..000000000000
--- a/lib/Core/TaskGroup.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- lld/Core/TaskGroup.cpp - Task Group --------------------------------===//
-//
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "lld/Core/TaskGroup.h"
-#include "llvm/Config/llvm-config.h"
-
-#include <atomic>
-#include <stack>
-#include <thread>
-
-#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
-#include <concrt.h>
-#include <ppl.h>
-#endif
-
-using namespace lld;
-
-namespace {
-
-/// \brief An abstract class that takes closures and runs them asynchronously.
-class Executor {
-public:
-  virtual ~Executor() = default;
-  virtual void add(std::function<void()> func) = 0;
-
-  static Executor *getDefaultExecutor();
-};
-
-#if !LLVM_ENABLE_THREADS
-class SyncExecutor : public Executor {
-public:
-  virtual void add(std::function<void()> F) { F(); }
-};
-
-Executor *Executor::getDefaultExecutor() {
-  static SyncExecutor Exec;
-  return &Exec;
-}
-
-#elif defined(_MSC_VER)
-/// \brief An Executor that runs tasks via ConcRT.
-class ConcRTExecutor : public Executor {
-  struct Taskish {
-    Taskish(std::function<void()> Task) : Task(Task) {}
-
-    std::function<void()> Task;
-
-    static void run(void *P) {
-      Taskish *Self = static_cast<Taskish *>(P);
-      Self->Task();
-      concurrency::Free(Self);
-    }
-  };
-
-public:
-  virtual void add(std::function<void()> F) {
-    Concurrency::CurrentScheduler::ScheduleTask(
-        Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F));
-  }
-};
-
-Executor *Executor::getDefaultExecutor() {
-  static ConcRTExecutor exec;
-  return &exec;
-}
-
-#else
-/// \brief An implementation of an Executor that runs closures on a thread pool
-///   in filo order.
-class ThreadPoolExecutor : public Executor {
-public:
-  explicit ThreadPoolExecutor(
-      unsigned ThreadCount = std::thread::hardware_concurrency())
-      : Done(ThreadCount) {
-    // Spawn all but one of the threads in another thread as spawning threads
-    // can take a while.
-    std::thread([&, ThreadCount] {
-      for (size_t i = 1; i < ThreadCount; ++i) {
-        std::thread([=] { work(); }).detach();
-      }
-      work();
-    }).detach();
-  }
-
-  ~ThreadPoolExecutor() override {
-    std::unique_lock<std::mutex> Lock(Mutex);
-    Stop = true;
-    Lock.unlock();
-    Cond.notify_all();
-    // Wait for ~Latch.
-  }
-
-  void add(std::function<void()> F) override {
-    std::unique_lock<std::mutex> Lock(Mutex);
-    WorkStack.push(F);
-    Lock.unlock();
-    Cond.notify_one();
-  }
-
-private:
-  void work() {
-    while (true) {
-      std::unique_lock<std::mutex> Lock(Mutex);
-      Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
-      if (Stop)
-        break;
-      auto Task = WorkStack.top();
-      WorkStack.pop();
-      Lock.unlock();
-      Task();
-    }
-    Done.dec();
-  }
-
-  std::atomic<bool> Stop{false};
-  std::stack<std::function<void()>> WorkStack;
-  std::mutex Mutex;
-  std::condition_variable Cond;
-  Latch Done;
-};
-
-Executor *Executor::getDefaultExecutor() {
-  static ThreadPoolExecutor exec;
-  return &exec;
-}
-#endif
-}
-
-void TaskGroup::spawn(std::function<void()> f) {
-  _latch.inc();
-  Executor::getDefaultExecutor()->add([&, f] {
-    f();
-    _latch.dec();
-  });
-}
diff --git a/lib/ReaderWriter/MachO/LayoutPass.cpp b/lib/ReaderWriter/MachO/LayoutPass.cpp
index 24dbf79d3e3b..7bca07eb16d6 100644
--- a/lib/ReaderWriter/MachO/LayoutPass.cpp
+++ b/lib/ReaderWriter/MachO/LayoutPass.cpp
@@ -9,12 +9,12 @@
 
 #include "LayoutPass.h"
 #include "lld/Core/Instrumentation.h"
-#include "lld/Core/Parallel.h"
 #include "lld/Core/PassManager.h"
 #include "lld/ReaderWriter/MachOLinkingContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
 #include <algorithm>
 #include <set>
 #include <utility>
@@ -461,10 +461,10 @@ llvm::Error LayoutPass::perform(SimpleFile &mergedFile) {
   });
 
   std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
-  parallel_sort(vec.begin(), vec.end(),
-      [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
-        return compareAtoms(l, r, _customSorter);
-      });
+  sort(llvm::parallel::par, vec.begin(), vec.end(),
+       [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
+         return compareAtoms(l, r, _customSorter);
+       });
   DEBUG(checkTransitivity(vec, _customSorter));
   undecorate(atomRange, vec);
 
diff --git a/test/COFF/constant-export.test b/test/COFF/constant-export.test
index 18b1f5e30d29..80597660ce19 100644
--- a/test/COFF/constant-export.test
+++ b/test/COFF/constant-export.test
@@ -1,5 +1,5 @@
 # RUN: mkdir -p %t
-# RUN: yaml2obj -o %t/constant-export.obj %S/constant-export.yaml
+# RUN: yaml2obj -o %t/constant-export.obj %s
 # RUN: lld-link /machine:x86 /dll /entry:__CFConstantStringClassReference -out:%t/constant-export.dll %t/constant-export.obj
 # RUN: llvm-readobj -coff-exports %t/constant-export.lib | FileCheck %s
 
@@ -7,3 +7,86 @@
 # CHECK: Name type: noprefix
 # CHECK: Symbol: __imp____CFConstantStringClassReference
 
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_I386
+  Characteristics: [  ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     ''
+  - Name:            .data
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       4
+    SectionData:     ''
+  - Name:            .bss
+    Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       4
+    SectionData:     ''
+  - Name:            .drectve
+    Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ]
+    Alignment:       1
+    SectionData:     20202D6578706F72743A5F5F5F4346436F6E7374616E74537472696E67436C6173735265666572656E63652C434F4E5354414E54
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          0
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
+  - Name:            .data
+    Value:           0
+    SectionNumber:   2
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          0
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          2
+  - Name:            .bss
+    Value:           0
+    SectionNumber:   3
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          0
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          3
+  - Name:            .drectve
+    Value:           0
+    SectionNumber:   4
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          52
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        1983959296
+      Number:          4
+  - Name:            '@feat.00'
+    Value:           1
+    SectionNumber:   -1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+  - Name:            ___CFConstantStringClassReference
+    Value:           128
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+...
diff --git a/test/COFF/constant-export.yaml b/test/COFF/constant-export.yaml
deleted file mode 100644
index 7e44bb70c9d7..000000000000
--- a/test/COFF/constant-export.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
---- !COFF
-header:
-  Machine:         IMAGE_FILE_MACHINE_I386
-  Characteristics: [  ]
-sections:
-  - Name:            .text
-    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
-    Alignment:       4
-    SectionData:     ''
-  - Name:            .data
-    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
-    Alignment:       4
-    SectionData:     ''
-  - Name:            .bss
-    Characteristics: [ IMAGE_SCN_CNT_UNINITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
-    Alignment:       4
-    SectionData:     ''
-  - Name:            .drectve
-    Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ]
-    Alignment:       1
-    SectionData:     20202D6578706F72743A5F5F5F4346436F6E7374616E74537472696E67436C6173735265666572656E63652C434F4E5354414E54
-symbols:
-  - Name:            .text
-    Value:           0
-    SectionNumber:   1
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_STATIC
-    SectionDefinition:
-      Length:          0
-      NumberOfRelocations: 0
-      NumberOfLinenumbers: 0
-      CheckSum:        0
-      Number:          1
-  - Name:            .data
-    Value:           0
-    SectionNumber:   2
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_STATIC
-    SectionDefinition:
-      Length:          0
-      NumberOfRelocations: 0
-      NumberOfLinenumbers: 0
-      CheckSum:        0
-      Number:          2
-  - Name:            .bss
-    Value:           0
-    SectionNumber:   3
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_STATIC
-    SectionDefinition:
-      Length:          0
-      NumberOfRelocations: 0
-      NumberOfLinenumbers: 0
-      CheckSum:        0
-      Number:          3
-  - Name:            .drectve
-    Value:           0
-    SectionNumber:   4
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_STATIC
-    SectionDefinition:
-      Length:          52
-      NumberOfRelocations: 0
-      NumberOfLinenumbers: 0
-      CheckSum:        1983959296
-      Number:          4
-  - Name:            '@feat.00'
-    Value:           1
-    SectionNumber:   -1
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_STATIC
-  - Name:            ___CFConstantStringClassReference
-    Value:           128
-    SectionNumber:   0
-    SimpleType:      IMAGE_SYM_TYPE_NULL
-    ComplexType:     IMAGE_SYM_DTYPE_NULL
-    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
-...
diff --git a/test/ELF/Inputs/i386-static-tls-model1.s b/test/ELF/Inputs/i386-static-tls-model1.s
deleted file mode 100644
index e7e584c1fcf1..000000000000
--- a/test/ELF/Inputs/i386-static-tls-model1.s
+++ /dev/null
@@ -1,10 +0,0 @@
-.section ".tdata", "awT", @progbits
-.globl var
-var:
-
-.section .foo, "aw"
-.global _start
-_start:
- movl $var@tpoff, %edx # R_386_TLS_LE_32
- movl %gs:0, %ecx
- subl %edx, %eax
diff --git a/test/ELF/Inputs/i386-static-tls-model2.s b/test/ELF/Inputs/i386-static-tls-model2.s
deleted file mode 100644
index b28a1458742d..000000000000
--- a/test/ELF/Inputs/i386-static-tls-model2.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.section ".tdata", "awT", @progbits
-.globl var
-var:
-
-.section .foo, "aw"
-.global _start
-_start: 
- movl %gs:0, %eax
- addl var@gotntpoff(%ebx),%eax # R_386_TLS_GOTIE
diff --git a/test/ELF/Inputs/i386-static-tls-model3.s b/test/ELF/Inputs/i386-static-tls-model3.s
deleted file mode 100644
index f92267ecbdd0..000000000000
--- a/test/ELF/Inputs/i386-static-tls-model3.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.section ".tdata", "awT", @progbits
-.globl var
-var:
-
-.section .foo, "aw"
-.global _start
-_start:
- movl %gs:0, %eax
- addl var@indntpoff, %eax #R_386_TLS_IE
diff --git a/test/ELF/Inputs/i386-static-tls-model4.s b/test/ELF/Inputs/i386-static-tls-model4.s
deleted file mode 100644
index ffb20def4fab..000000000000
--- a/test/ELF/Inputs/i386-static-tls-model4.s
+++ /dev/null
@@ -1,9 +0,0 @@
-.section ".tdata", "awT", @progbits
-.globl var
-var:
-
-.section .foo, "aw"
-.global _start
-_start:
- movl %gs:0, %eax
- leal var@ntpoff(%eax), %eax #R_386_TLS_LE
diff --git a/test/ELF/gdb-index-empty.s b/test/ELF/gdb-index-empty.s
new file mode 100644
index 000000000000..933afed33e2f
--- /dev/null
+++ b/test/ELF/gdb-index-empty.s
@@ -0,0 +1,116 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t %s
+# RUN: ld.lld --gdb-index --gc-sections -o %t2 %t
+# RUN: llvm-dwarfdump -debug-dump=gdb_index %t2 | FileCheck %s
+
+# CHECK: Address area offset = 0x28, has 0 entries:
+
+# Generated with: (clang r302976)
+# echo "void _start() { __builtin_unreachable(); }" | \
+# clang -Os -g -S -o gdb-index-empty.s -x c - -Xclang -fdebug-compilation-dir -Xclang .
+
+	.text
+	.file	"-"
+	.globl	_start
+	.type	_start,@function
+_start:                                 # @_start
+.Lfunc_begin0:
+	.cfi_startproc
+# BB#0:                                 # %entry
+.Lfunc_end0:
+	.size	_start, .Lfunc_end0-_start
+	.cfi_endproc
+
+	.file	1 "<stdin>"
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"clang version 5.0.0 "  # string offset=0
+.Linfo_string1:
+	.asciz	"-"                     # string offset=21
+.Linfo_string2:
+	.asciz	"."                     # string offset=23
+.Linfo_string3:
+	.asciz	"_start"                # string offset=25
+	.section	.debug_loc,"",@progbits
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	37                      # DW_AT_producer
+	.byte	14                      # DW_FORM_strp
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	16                      # DW_AT_stmt_list
+	.byte	23                      # DW_FORM_sec_offset
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	18                      # DW_AT_high_pc
+	.byte	6                       # DW_FORM_data4
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	2                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	18                      # DW_AT_high_pc
+	.byte	6                       # DW_FORM_data4
+	.byte	64                      # DW_AT_frame_base
+	.byte	24                      # DW_FORM_exprloc
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	60                      # Length of Unit
+	.short	4                       # DWARF version number
+	.long	.debug_abbrev           # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x35 DW_TAG_compile_unit
+	.long	.Linfo_string0          # DW_AT_producer
+	.short	12                      # DW_AT_language
+	.long	.Linfo_string1          # DW_AT_name
+	.long	.Lline_table_start0     # DW_AT_stmt_list
+	.long	.Linfo_string2          # DW_AT_comp_dir
+	.quad	.Lfunc_begin0           # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+	.byte	2                       # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram
+	.quad	.Lfunc_begin0           # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+	.byte	1                       # DW_AT_frame_base
+	.byte	87
+	.long	.Linfo_string3          # DW_AT_name
+	.byte	1                       # DW_AT_decl_file
+	.byte	1                       # DW_AT_decl_line
+                                        # DW_AT_external
+	.byte	0                       # End Of Children Mark
+	.section	.debug_ranges,"",@progbits
+	.section	.debug_macinfo,"",@progbits
+.Lcu_macro_begin0:
+	.byte	0                       # End Of Macro List Mark
+	.section	.debug_pubnames,"",@progbits
+	.long	.LpubNames_end0-.LpubNames_begin0 # Length of Public Names Info
+.LpubNames_begin0:
+	.short	2                       # DWARF Version
+	.long	.Lcu_begin0             # Offset of Compilation Unit Info
+	.long	64                      # Compilation Unit Length
+	.long	42                      # DIE offset
+	.asciz	"_start"                # External Name
+	.long	0                       # End Mark
+.LpubNames_end0:
+
+	.ident	"clang version 5.0.0 "
+	.section	".note.GNU-stack","",@progbits
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/test/ELF/gdb-index-gc-sections.s b/test/ELF/gdb-index-gc-sections.s
new file mode 100644
index 000000000000..70a14754656c
--- /dev/null
+++ b/test/ELF/gdb-index-gc-sections.s
@@ -0,0 +1,157 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t %s
+# RUN: ld.lld --gdb-index --gc-sections -o %t2 %t
+# RUN: llvm-dwarfdump -debug-dump=gdb_index %t2 | FileCheck %s
+
+# CHECK: Address area offset = 0x28, has 1 entries:
+# CHECK-NEXT:    Low/High address = [0x201000, 0x201001) (Size: 0x1), CU id = 0
+
+# Generated with: (clang r302976)
+# echo "void _start() {} void dead() {}" | \
+# clang -Os -g -S -ffunction-sections -o gdb-index-gc-sections.s -x c - -Xclang -fdebug-compilation-dir -Xclang .
+
+	.text
+	.file	"-"
+	.section	.text._start,"ax",@progbits
+	.globl	_start
+	.type	_start,@function
+_start:                                 # @_start
+.Lfunc_begin0:
+	.file	1 "<stdin>"
+	.loc	1 1 0                   # <stdin>:1:0
+	.cfi_startproc
+# BB#0:                                 # %entry
+	.loc	1 1 16 prologue_end     # <stdin>:1:16
+	retq
+.Ltmp0:
+.Lfunc_end0:
+	.size	_start, .Lfunc_end0-_start
+	.cfi_endproc
+
+	.section	.text.dead,"ax",@progbits
+	.globl	dead
+	.type	dead,@function
+dead:                                   # @dead
+.Lfunc_begin1:
+	.loc	1 1 0                   # <stdin>:1:0
+	.cfi_startproc
+# BB#0:                                 # %entry
+	.loc	1 1 31 prologue_end     # <stdin>:1:31
+	retq
+.Ltmp1:
+.Lfunc_end1:
+	.size	dead, .Lfunc_end1-dead
+	.cfi_endproc
+
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"clang version 5.0.0 "  # string offset=0
+.Linfo_string1:
+	.asciz	"-"                     # string offset=21
+.Linfo_string2:
+	.asciz	"."                     # string offset=23
+.Linfo_string3:
+	.asciz	"_start"                # string offset=25
+.Linfo_string4:
+	.asciz	"dead"                  # string offset=32
+	.section	.debug_loc,"",@progbits
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	37                      # DW_AT_producer
+	.byte	14                      # DW_FORM_strp
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	16                      # DW_AT_stmt_list
+	.byte	23                      # DW_FORM_sec_offset
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	85                      # DW_AT_ranges
+	.byte	23                      # DW_FORM_sec_offset
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	2                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	18                      # DW_AT_high_pc
+	.byte	6                       # DW_FORM_data4
+	.byte	64                      # DW_AT_frame_base
+	.byte	24                      # DW_FORM_exprloc
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	81                      # Length of Unit
+	.short	4                       # DWARF version number
+	.long	.debug_abbrev           # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x4a DW_TAG_compile_unit
+	.long	.Linfo_string0          # DW_AT_producer
+	.short	12                      # DW_AT_language
+	.long	.Linfo_string1          # DW_AT_name
+	.long	.Lline_table_start0     # DW_AT_stmt_list
+	.long	.Linfo_string2          # DW_AT_comp_dir
+	.quad	0                       # DW_AT_low_pc
+	.long	.Ldebug_ranges0         # DW_AT_ranges
+	.byte	2                       # Abbrev [2] 0x2a:0x15 DW_TAG_subprogram
+	.quad	.Lfunc_begin0           # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+	.byte	1                       # DW_AT_frame_base
+	.byte	87
+	.long	.Linfo_string3          # DW_AT_name
+	.byte	1                       # DW_AT_decl_file
+	.byte	1                       # DW_AT_decl_line
+                                        # DW_AT_external
+	.byte	2                       # Abbrev [2] 0x3f:0x15 DW_TAG_subprogram
+	.quad	.Lfunc_begin1           # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
+	.byte	1                       # DW_AT_frame_base
+	.byte	87
+	.long	.Linfo_string4          # DW_AT_name
+	.byte	1                       # DW_AT_decl_file
+	.byte	1                       # DW_AT_decl_line
+                                        # DW_AT_external
+	.byte	0                       # End Of Children Mark
+	.section	.debug_ranges,"",@progbits
+.Ldebug_ranges0:
+	.quad	.Lfunc_begin0
+	.quad	.Lfunc_end0
+	.quad	.Lfunc_begin1
+	.quad	.Lfunc_end1
+	.quad	0
+	.quad	0
+	.section	.debug_macinfo,"",@progbits
+.Lcu_macro_begin0:
+	.byte	0                       # End Of Macro List Mark
+	.section	.debug_pubnames,"",@progbits
+	.long	.LpubNames_end0-.LpubNames_begin0 # Length of Public Names Info
+.LpubNames_begin0:
+	.short	2                       # DWARF Version
+	.long	.Lcu_begin0             # Offset of Compilation Unit Info
+	.long	85                      # Compilation Unit Length
+	.long	42                      # DIE offset
+	.asciz	"_start"                # External Name
+	.long	63                      # DIE offset
+	.asciz	"dead"                  # External Name
+	.long	0                       # End Mark
+.LpubNames_end0:
+
+	.ident	"clang version 5.0.0 "
+	.section	".note.GNU-stack","",@progbits
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/test/ELF/i386-static-tls-model.s b/test/ELF/i386-static-tls-model.s
deleted file mode 100644
index b2799c4c722f..000000000000
--- a/test/ELF/i386-static-tls-model.s
+++ /dev/null
@@ -1,20 +0,0 @@
-# REQUIRES: x86
-
-# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model1.s -o %t.o
-# RUN: ld.lld %t.o -o %t1 -shared
-# RUN: llvm-readobj  -dynamic-table %t1 | FileCheck %s
-
-# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model2.s -o %t.o
-# RUN: ld.lld %t.o -o %t2 -shared
-# RUN: llvm-readobj  -dynamic-table %t2 | FileCheck %s
-
-# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model3.s -o %t.o
-# RUN: ld.lld %t.o -o %t3 -shared
-# RUN: llvm-readobj  -dynamic-table %t3 | FileCheck %s
-
-# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux %S/Inputs/i386-static-tls-model4.s -o %t.o
-# RUN: ld.lld %t.o -o %t4 -shared
-# RUN: llvm-readobj  -dynamic-table %t4 | FileCheck %s
-
-# CHECK: DynamicSection [
-# CHECK: FLAGS STATIC_TLS
diff --git a/test/ELF/i386-tls-ie-shared.s b/test/ELF/i386-tls-ie-shared.s
index c6dccf84a216..8becc3199f95 100644
--- a/test/ELF/i386-tls-ie-shared.s
+++ b/test/ELF/i386-tls-ie-shared.s
@@ -13,8 +13,8 @@
 // GOTRELSHARED-NEXT:     SHF_ALLOC
 // GOTRELSHARED-NEXT:     SHF_WRITE
 // GOTRELSHARED-NEXT:   ]
-// GOTRELSHARED-NEXT:   Address: 0x1060
-// GOTRELSHARED-NEXT:   Offset: 0x1060
+// GOTRELSHARED-NEXT:   Address: 0x1058
+// GOTRELSHARED-NEXT:   Offset: 0x1058
 // GOTRELSHARED-NEXT:   Size: 16
 // GOTRELSHARED-NEXT:   Link: 0
 // GOTRELSHARED-NEXT:   Info: 0
@@ -31,36 +31,36 @@
 // GOTRELSHARED-NEXT:     0x202D R_386_RELATIVE - 0x0
 // GOTRELSHARED-NEXT:     0x2036 R_386_RELATIVE - 0x0
 // GOTRELSHARED-NEXT:     0x203F R_386_RELATIVE - 0x0
-// GOTRELSHARED-NEXT:     0x1060 R_386_TLS_TPOFF tlslocal0 0x0
-// GOTRELSHARED-NEXT:     0x1064 R_386_TLS_TPOFF tlslocal1 0x0
-// GOTRELSHARED-NEXT:     0x1068 R_386_TLS_TPOFF tlsshared0 0x0
-// GOTRELSHARED-NEXT:     0x106C R_386_TLS_TPOFF tlsshared1 0x0
+// GOTRELSHARED-NEXT:     0x1058 R_386_TLS_TPOFF tlslocal0 0x0
+// GOTRELSHARED-NEXT:     0x105C R_386_TLS_TPOFF tlslocal1 0x0
+// GOTRELSHARED-NEXT:     0x1060 R_386_TLS_TPOFF tlsshared0 0x0
+// GOTRELSHARED-NEXT:     0x1064 R_386_TLS_TPOFF tlsshared1 0x0
 // GOTRELSHARED-NEXT:   }
 // GOTRELSHARED-NEXT: ]
 // GOTRELSHARED:      0x6FFFFFFA RELCOUNT             8
 
 // DISASMSHARED:       Disassembly of section test:
 // DISASMSHARED-NEXT:  _start:
-// (.got)[0] = 0x1060 = 4192
-// (.got)[1] = 0x1064 = 4196
-// (.got)[2] = 0x1068 = 4200
-// (.got)[3] = 0x106C = 4204
-// DISASMSHARED-NEXT:  2000: {{.*}}  movl  4192, %ecx
-// DISASMSHARED-NEXT:  2006: {{.*}}  movl  %gs:(%ecx), %eax
-// DISASMSHARED-NEXT:  2009: {{.*}}  movl  4192, %eax
-// DISASMSHARED-NEXT:  200e: {{.*}}  movl  %gs:(%eax), %eax
-// DISASMSHARED-NEXT:  2011: {{.*}}  addl  4192, %ecx
-// DISASMSHARED-NEXT:  2017: {{.*}}  movl  %gs:(%ecx), %eax
-// DISASMSHARED-NEXT:  201a: {{.*}}  movl  4196, %ecx
-// DISASMSHARED-NEXT:  2020: {{.*}}  movl  %gs:(%ecx), %eax
-// DISASMSHARED-NEXT:  2023: {{.*}}  movl  4196, %eax
-// DISASMSHARED-NEXT:  2028: {{.*}}  movl  %gs:(%eax), %eax
-// DISASMSHARED-NEXT:  202b: {{.*}}  addl  4196, %ecx
-// DISASMSHARED-NEXT:  2031: {{.*}}  movl  %gs:(%ecx), %eax
-// DISASMSHARED-NEXT:  2034: {{.*}}  movl  4200, %ecx
-// DISASMSHARED-NEXT:  203a: {{.*}}  movl  %gs:(%ecx), %eax
-// DISASMSHARED-NEXT:  203d: {{.*}}  addl  4204, %ecx
-// DISASMSHARED-NEXT:  2043: {{.*}}  movl  %gs:(%ecx), %eax
+// (.got)[0] = 0x2050 = 8272
+// (.got)[1] = 0x2054 = 8276
+// (.got)[2] = 0x2058 = 8280
+// (.got)[3] = 0x205C = 8284
+// DISASMSHARED-NEXT:  2000: 8b 0d 58 10 00 00   movl  4184, %ecx
+// DISASMSHARED-NEXT:  2006: 65 8b 01  movl  %gs:(%ecx), %eax
+// DISASMSHARED-NEXT:  2009: a1 58 10 00 00  movl  4184, %eax
+// DISASMSHARED-NEXT:  200e: 65 8b 00  movl  %gs:(%eax), %eax
+// DISASMSHARED-NEXT:  2011: 03 0d 58 10 00 00   addl  4184, %ecx
+// DISASMSHARED-NEXT:  2017: 65 8b 01  movl  %gs:(%ecx), %eax
+// DISASMSHARED-NEXT:  201a: 8b 0d 5c 10 00 00   movl  4188, %ecx
+// DISASMSHARED-NEXT:  2020: 65 8b 01  movl  %gs:(%ecx), %eax
+// DISASMSHARED-NEXT:  2023: a1 5c 10 00 00  movl  4188, %eax
+// DISASMSHARED-NEXT:  2028: 65 8b 00  movl  %gs:(%eax), %eax
+// DISASMSHARED-NEXT:  202b: 03 0d 5c 10 00 00   addl  4188, %ecx
+// DISASMSHARED-NEXT:  2031: 65 8b 01  movl  %gs:(%ecx), %eax
+// DISASMSHARED-NEXT:  2034: 8b 0d 60 10 00 00   movl  4192, %ecx
+// DISASMSHARED-NEXT:  203a: 65 8b 01  movl  %gs:(%ecx), %eax
+// DISASMSHARED-NEXT:  203d: 03 0d 64 10 00 00   addl  4196, %ecx
+// DISASMSHARED-NEXT:  2043: 65 8b 01  movl  %gs:(%ecx), %eax
 
 .type tlslocal0,@object
 .section .tbss,"awT",@nobits
diff --git a/test/ELF/incompatible-section-types2.s b/test/ELF/incompatible-section-types2.s
index 2cf9b8548aa1..146e680ab271 100644
--- a/test/ELF/incompatible-section-types2.s
+++ b/test/ELF/incompatible-section-types2.s
@@ -1,7 +1,9 @@
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 // RUN: not ld.lld %t.o -o %t 2>&1 | FileCheck %s
 
-// CHECK: error: Section has different type from others with the same name <internal>:(.shstrtab)
+// CHECK:      error: section type mismatch for .shstrtab
+// CHECK-NEXT: >>> <internal>:(.shstrtab): SHT_STRTAB
+// CHECK-NEXT: >>> output section .shstrtab: Unknown
 
-.section .shstrtab,""
+.section .shstrtab,"",@12345
 .short 20
diff --git a/test/ELF/linkerscript/early-assign-symbol.s b/test/ELF/linkerscript/early-assign-symbol.s
new file mode 100644
index 000000000000..21940c088393
--- /dev/null
+++ b/test/ELF/linkerscript/early-assign-symbol.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+
+# RUN: echo "SECTIONS { aaa = 1 + ABSOLUTE(foo - 1); .text  : { *(.text*) } }" > %t1.script
+# RUN: not ld.lld -o %t --script %t1.script %t.o 2>&1 | FileCheck %s
+
+# RUN: echo "SECTIONS { aaa = ABSOLUTE(foo - 1) + 1; .text  : { *(.text*) } }" > %t2.script
+# RUN: not ld.lld -o %t --script %t2.script %t.o 2>&1 | FileCheck %s
+
+# CHECK: error: unable to evaluate expression: input section .text has no output section assigned
+
+.section .text
+.globl foo
+foo:
diff --git a/test/ELF/linkerscript/ehdr_start.s b/test/ELF/linkerscript/ehdr_start.s
index 935fa2bf3391..4da158a83956 100644
--- a/test/ELF/linkerscript/ehdr_start.s
+++ b/test/ELF/linkerscript/ehdr_start.s
@@ -2,9 +2,17 @@
 
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
 # RUN: echo "SECTIONS { }" > %t.script
-# RUN: not ld.lld %t.o -script %t.script -o %t 2>&1 | FileCheck %s
-# CHECK: error: undefined symbol: __ehdr_start
-# CHECK: >>> referenced by {{.*}}:(.text+0x0)
+# RUN: ld.lld %t.o -script %t.script -o %t
+# RUN: llvm-readobj -symbols %t | FileCheck %s
+# CHECK:    Name: __ehdr_start (1)
+# CHECK-NEXT:    Value: 0x0
+# CHECK-NEXT:    Size: 0
+# CHECK-NEXT:    Binding: Local (0x0)
+# CHECK-NEXT:    Type: None (0x0)
+# CHECK-NEXT:    Other [ (0x2)
+# CHECK-NEXT:      STV_HIDDEN (0x2)
+# CHECK-NEXT:    ]
+# CHECK-NEXT:    Section: .text (0x1)
 
 .text
 .global _start, __ehdr_start
diff --git a/test/ELF/linkerscript/sections-constraint.s b/test/ELF/linkerscript/sections-constraint.s
index 4d95ec18336c..796240627170 100644
--- a/test/ELF/linkerscript/sections-constraint.s
+++ b/test/ELF/linkerscript/sections-constraint.s
@@ -24,8 +24,8 @@
 # NO1-NEXT: 0               00000000
 # NO1:  .writable     00000004
 # NO1:  .foo.2        00000004
-# NO1:  .readable     00000004
 # NO1:  .foo.1        00000004
+# NO1:  .readable     00000004
 
 .global _start
 _start:
diff --git a/test/ELF/linkerscript/sections.s b/test/ELF/linkerscript/sections.s
index 69c6f19d078d..d5645c303754 100644
--- a/test/ELF/linkerscript/sections.s
+++ b/test/ELF/linkerscript/sections.s
@@ -45,8 +45,9 @@
 # SEC-ORDER: 3 .shstrtab     0000003b {{[0-9a-f]*}}
 # SEC-ORDER: 4 .symtab       00000030 {{[0-9a-f]*}}
 # SEC-ORDER: 5 .strtab       00000008 {{[0-9a-f]*}}
-# SEC-ORDER: 6 .data         00000020 {{[0-9a-f]*}} DATA
-# SEC-ORDER: 7 .text         0000000e {{[0-9a-f]*}} TEXT DATA
+# SEC-ORDER: 6 .comment      00000008 {{[0-9a-f]*}}
+# SEC-ORDER: 7 .data         00000020 {{[0-9a-f]*}} DATA
+# SEC-ORDER: 8 .text         0000000e {{[0-9a-f]*}} TEXT DATA
 
 # .text and .data have swapped names but proper sizes and types.
 # RUN: echo "SECTIONS { \
diff --git a/test/ELF/linkerscript/symbol-memoryexpr.s b/test/ELF/linkerscript/symbol-memoryexpr.s
new file mode 100644
index 000000000000..9c75274e1644
--- /dev/null
+++ b/test/ELF/linkerscript/symbol-memoryexpr.s
@@ -0,0 +1,33 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+
+# RUN: echo "MEMORY { \
+# RUN:   ram (rwx)  : ORIGIN = 0x8000, LENGTH = 256K \
+# RUN: } \
+# RUN: SECTIONS { \
+# RUN:         origin = ORIGIN(ram); \
+# RUN:         length = LENGTH(ram); \
+# RUN:         end    = ORIGIN(ram) + LENGTH(ram); \
+# RUN:       }" > %t.script
+# RUN: ld.lld -o %t1 --script %t.script %t
+# RUN: llvm-objdump -t %t1 | FileCheck %s
+
+# CHECK:      SYMBOL TABLE:
+# CHECK-NEXT: 0000000000000000 *UND* 00000000
+# CHECK-NEXT: 0000000000008000 .text 00000000 _start
+# CHECK-NEXT: 0000000000008000 *ABS* 00000000 origin
+# CHECK-NEXT: 0000000000040000 *ABS* 00000000 length
+# CHECK-NEXT: 0000000000048000 *ABS* 00000000 end
+
+# RUN: echo "SECTIONS { \
+# RUN:         no_exist_origin = ORIGIN(ram); \
+# RUN:         no_exist_length = LENGTH(ram); \
+# RUN:       }" > %t2.script
+# RUN: not ld.lld -o %t2 --script %t2.script %t 2>&1 \
+# RUN:  | FileCheck -check-prefix=ERR %s
+# ERR: {{.*}}.script:1: memory region not defined: ram
+
+
+.global _start
+_start:
+ nop
diff --git a/test/ELF/many-alloc-sections.s b/test/ELF/many-alloc-sections.s
new file mode 100644
index 000000000000..441e5ff32d08
--- /dev/null
+++ b/test/ELF/many-alloc-sections.s
@@ -0,0 +1,106 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o
+// RUN: echo "SECTIONS { . = SIZEOF_HEADERS; .text : { *(.text) } }" > %t.script
+// FIXME: threads are disable because the test is too slow with them (PR32942).
+// RUN: ld.lld -T %t.script %t.o -o %t --no-threads
+// RUN: llvm-readobj -t %t | FileCheck %s
+
+// Test that _start is in the correct section.
+// CHECK:      Name: _start
+// CHECK-NEXT: Value: 0x120
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Global
+// CHECK-NEXT: Type: None
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: dm
+
+.macro gen_sections4 x
+        .section a\x,"a"
+        .section b\x,"a"
+        .section c\x,"a"
+        .section d\x,"a"
+.endm
+
+.macro gen_sections8 x
+        gen_sections4 a\x
+        gen_sections4 b\x
+.endm
+
+.macro gen_sections16 x
+        gen_sections8 a\x
+        gen_sections8 b\x
+.endm
+
+.macro gen_sections32 x
+        gen_sections16 a\x
+        gen_sections16 b\x
+.endm
+
+.macro gen_sections64 x
+        gen_sections32 a\x
+        gen_sections32 b\x
+.endm
+
+.macro gen_sections128 x
+        gen_sections64 a\x
+        gen_sections64 b\x
+.endm
+
+.macro gen_sections256 x
+        gen_sections128 a\x
+        gen_sections128 b\x
+.endm
+
+.macro gen_sections512 x
+        gen_sections256 a\x
+        gen_sections256 b\x
+.endm
+
+.macro gen_sections1024 x
+        gen_sections512 a\x
+        gen_sections512 b\x
+.endm
+
+.macro gen_sections2048 x
+        gen_sections1024 a\x
+        gen_sections1024 b\x
+.endm
+
+.macro gen_sections4096 x
+        gen_sections2048 a\x
+        gen_sections2048 b\x
+.endm
+
+.macro gen_sections8192 x
+        gen_sections4096 a\x
+        gen_sections4096 b\x
+.endm
+
+.macro gen_sections16384 x
+        gen_sections8192 a\x
+        gen_sections8192 b\x
+.endm
+
+.macro gen_sections32768 x
+        gen_sections16384 a\x
+        gen_sections16384 b\x
+.endm
+
+        .bss
+        .section bar
+
+gen_sections32768 a
+gen_sections16384 b
+gen_sections8192 c
+gen_sections4096 d
+gen_sections2048 e
+gen_sections1024 f
+gen_sections512 g
+gen_sections128 h
+gen_sections64 i
+gen_sections32 j
+gen_sections16 k
+gen_sections8 l
+gen_sections4 m
+
+.global _start
+_start:
diff --git a/test/ELF/many-sections.s b/test/ELF/many-sections.s
index 77e76c20a60d..ae923889ddc1 100644
--- a/test/ELF/many-sections.s
+++ b/test/ELF/many-sections.s
@@ -11,7 +11,14 @@
 // CHECK-NEXT: Section: dm (0xFF00)
 
 
-// RUN: ld.lld %t -o %t2
+// FIXME: threads are disable because the test is too slow with them (PR32942).
+// RUN: ld.lld %t -o %t2 --no-threads
+// RUN: llvm-readobj -t %t2 | FileCheck --check-prefix=LINKED %s
+
+// Test also with a linker script.
+// RUN: echo "SECTIONS { . = SIZEOF_HEADERS; .text : { *(.text) } }" > %t.script
+// FIXME: threads are disable because the test is too slow with them (PR32942).
+// RUN: ld.lld -T %t.script %t -o %t2 --no-threads
 // RUN: llvm-readobj -t %t2 | FileCheck --check-prefix=LINKED %s
 
 // Test that _start is in the correct section.
diff --git a/test/ELF/tls-dynamic-i686.s b/test/ELF/tls-dynamic-i686.s
index 04fd13822530..ac88e6eaed31 100644
--- a/test/ELF/tls-dynamic-i686.s
+++ b/test/ELF/tls-dynamic-i686.s
@@ -56,8 +56,8 @@ addl tls1@gotntpoff(%ebx),%eax
 // CHECK-NEXT:   SHF_ALLOC
 // CHECK-NEXT:   SHF_WRITE
 // CHECK-NEXT: ]
-// CHECK-NEXT: Address: 0x3070
-// CHECK-NEXT: Offset: 0x3070
+// CHECK-NEXT: Address: 0x3068
+// CHECK-NEXT: Offset: 0x3068
 // CHECK-NEXT: Size: 32
 // CHECK-NEXT: Link: 0
 // CHECK-NEXT: Info: 0
@@ -66,13 +66,13 @@ addl tls1@gotntpoff(%ebx),%eax
 
 // CHECK: Relocations [
 // CHECK:      Section ({{.+}}) .rel.dyn {
-// CHECK-NEXT: 0x3080 R_386_TLS_DTPMOD32 - 0x0
-// CHECK-NEXT: 0x3070 R_386_TLS_DTPMOD32 tls0 0x0
-// CHECK-NEXT: 0x3074 R_386_TLS_DTPOFF32 tls0 0x0
-// CHECK-NEXT: 0x3088 R_386_TLS_TPOFF tls0 0x0
-// CHECK-NEXT: 0x3078 R_386_TLS_DTPMOD32 tls1 0x0
-// CHECK-NEXT: 0x307C R_386_TLS_DTPOFF32 tls1 0x0
-// CHECK-NEXT: 0x308C R_386_TLS_TPOFF tls1 0x0
+// CHECK-NEXT: 0x3078 R_386_TLS_DTPMOD32 - 0x0
+// CHECK-NEXT: 0x3068 R_386_TLS_DTPMOD32 tls0 0x0
+// CHECK-NEXT: 0x306C R_386_TLS_DTPOFF32 tls0 0x0
+// CHECK-NEXT: 0x3080 R_386_TLS_TPOFF tls0 0x0
+// CHECK-NEXT: 0x3070 R_386_TLS_DTPMOD32 tls1 0x0
+// CHECK-NEXT: 0x3074 R_386_TLS_DTPOFF32 tls1 0x0
+// CHECK-NEXT: 0x3084 R_386_TLS_TPOFF tls1 0x0
 // CHECK-NEXT: }
 
 // DIS:      Disassembly of section .text:
@@ -80,20 +80,20 @@ addl tls1@gotntpoff(%ebx),%eax
 // General dynamic model:
 // -32 and -24 are first and second GOT entries offsets.
 // Each one is a pair of records.
-// DIS-NEXT: 1000: {{.*}} leal -32(,%ebx), %eax
-// DIS-NEXT: 1007: {{.*}} calll 100
-// DIS-NEXT: 100c: {{.*}} leal -24(,%ebx), %eax
-// DIS-NEXT: 1013: {{.*}} calll 88
+// DIS-NEXT: 1000: 8d 04 1d e0 ff ff ff  leal -32(,%ebx), %eax
+// DIS-NEXT: 1007: e8 64 00 00 00        calll 100
+// DIS-NEXT: 100c: 8d 04 1d e8 ff ff ff  leal -24(,%ebx), %eax
+// DIS-NEXT: 1013: e8 58 00 00 00        calll 88
 // Local dynamic model:
 // -16 is a local module tls index offset.
-// DIS-NEXT: 1018: {{.*}} leal -16(%ebx), %eax
-// DIS-NEXT: 101e: {{.*}} calll 77
-// DIS-NEXT: 1023: {{.*}} leal 8(%eax), %edx
-// DIS-NEXT: 1029: {{.*}} leal -16(%ebx), %eax
-// DIS-NEXT: 102f: {{.*}} calll 60
-// DIS-NEXT: 1034: {{.*}} leal 12(%eax), %edx
+// DIS-NEXT: 1018: 8d 83 f0 ff ff ff leal -16(%ebx), %eax
+// DIS-NEXT: 101e: e8 4d 00 00 00    calll 77
+// DIS-NEXT: 1023: 8d 90 08 00 00 00 leal 8(%eax), %edx
+// DIS-NEXT: 1029: 8d 83 f0 ff ff ff leal -16(%ebx), %eax
+// DIS-NEXT: 102f: e8 3c 00 00 00    calll 60
+// DIS-NEXT: 1034: 8d 90 0c 00 00 00 leal 12(%eax), %edx
 // Initial exec model:
-// DIS-NEXT: 103a: {{.*}} movl %gs:0, %eax
-// DIS-NEXT: 1040: {{.*}} addl -8(%ebx), %eax
-// DIS-NEXT: 1046: {{.*}} movl %gs:0, %eax
-// DIS-NEXT: 104c: {{.*}} addl -4(%ebx), %eax
+// DIS-NEXT: 103a: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DIS-NEXT: 1040: 03 83 f8 ff ff ff addl -8(%ebx), %eax
+// DIS-NEXT: 1046: 65 a1 00 00 00 00 movl %gs:0, %eax
+// DIS-NEXT: 104c: 03 83 fc ff ff ff addl -4(%ebx), %eax
diff --git a/test/ELF/tls-opt-iele-i686-nopic.s b/test/ELF/tls-opt-iele-i686-nopic.s
index a883bce511a6..b6608c16551c 100644
--- a/test/ELF/tls-opt-iele-i686-nopic.s
+++ b/test/ELF/tls-opt-iele-i686-nopic.s
@@ -13,8 +13,8 @@
 // GOTREL-NEXT:     SHF_ALLOC
 // GOTREL-NEXT:     SHF_WRITE
 // GOTREL-NEXT:   ]
-// GOTREL-NEXT:   Address: 0x12060
-// GOTREL-NEXT:   Offset: 0x2060
+// GOTREL-NEXT:   Address: 0x12058
+// GOTREL-NEXT:   Offset: 0x2058
 // GOTREL-NEXT:   Size: 8
 // GOTREL-NEXT:   Link: 0
 // GOTREL-NEXT:   Info: 0
@@ -23,8 +23,8 @@
 // GOTREL-NEXT: }
 // GOTREL:      Relocations [
 // GOTREL-NEXT: Section ({{.*}}) .rel.dyn {
-// GOTREL-NEXT:   0x12060 R_386_TLS_TPOFF tlsshared0 0x0
-// GOTREL-NEXT:   0x12064 R_386_TLS_TPOFF tlsshared1 0x0
+// GOTREL-NEXT:   0x12058 R_386_TLS_TPOFF tlsshared0 0x0
+// GOTREL-NEXT:   0x1205C R_386_TLS_TPOFF tlsshared1 0x0
 // GOTREL-NEXT:  }
 // GOTREL-NEXT: ]
 
@@ -32,24 +32,24 @@
 // DISASM-NEXT: _start:
 // 4294967288 = 0xFFFFFFF8
 // 4294967292 = 0xFFFFFFFC
-// 73824 = (.got)[0] = 0x12060
-// 73828 = (.got)[1] = 0x12064
-// DISASM-NEXT: 11000: {{.*}} movl $4294967288, %ecx
-// DISASM-NEXT: 11006: {{.*}} movl %gs:(%ecx), %eax
-// DISASM-NEXT: 11009: {{.*}} movl $4294967288, %eax
-// DISASM-NEXT: 1100e: {{.*}} movl %gs:(%eax), %eax
-// DISASM-NEXT: 11011: {{.*}} addl $4294967288, %ecx
-// DISASM-NEXT: 11017: {{.*}} movl %gs:(%ecx), %eax
-// DISASM-NEXT: 1101a: {{.*}} movl $4294967292, %ecx
-// DISASM-NEXT: 11020: {{.*}} movl %gs:(%ecx), %eax
-// DISASM-NEXT: 11023: {{.*}} movl $4294967292, %eax
-// DISASM-NEXT: 11028: {{.*}} movl %gs:(%eax), %eax
-// DISASM-NEXT: 1102b: {{.*}} addl $4294967292, %ecx
-// DISASM-NEXT: 11031: {{.*}} movl %gs:(%ecx), %eax
-// DISASM-NEXT: 11034: {{.*}} movl 73824, %ecx
-// DISASM-NEXT: 1103a: {{.*}} movl %gs:(%ecx), %eax
-// DISASM-NEXT: 1103d: {{.*}} addl 73828, %ecx
-// DISASM-NEXT: 11043: {{.*}} movl %gs:(%ecx), %eax
+// 73808 = (.got)[0] = 0x12058
+// 73812 = (.got)[1] = 0x1205C
+// DISASM-NEXT: 11000: c7 c1 f8 ff ff ff movl $4294967288, %ecx
+// DISASM-NEXT: 11006: 65 8b 01          movl %gs:(%ecx), %eax
+// DISASM-NEXT: 11009: b8 f8 ff ff ff    movl $4294967288, %eax
+// DISASM-NEXT: 1100e: 65 8b 00          movl %gs:(%eax), %eax
+// DISASM-NEXT: 11011: 81 c1 f8 ff ff ff addl $4294967288, %ecx
+// DISASM-NEXT: 11017: 65 8b 01          movl %gs:(%ecx), %eax
+// DISASM-NEXT: 1101a: c7 c1 fc ff ff ff movl $4294967292, %ecx
+// DISASM-NEXT: 11020: 65 8b 01          movl %gs:(%ecx), %eax
+// DISASM-NEXT: 11023: b8 fc ff ff ff    movl $4294967292, %eax
+// DISASM-NEXT: 11028: 65 8b 00          movl %gs:(%eax), %eax
+// DISASM-NEXT: 1102b: 81 c1 fc ff ff ff addl $4294967292, %ecx
+// DISASM-NEXT: 11031: 65 8b 01          movl %gs:(%ecx), %eax
+// DISASM-NEXT: 11034: 8b 0d 58 20 01 00 movl 73816, %ecx
+// DISASM-NEXT: 1103a: 65 8b 01          movl %gs:(%ecx), %eax
+// DISASM-NEXT: 1103d: 03 0d 5c 20 01 00 addl 73820, %ecx
+// DISASM-NEXT: 11043: 65 8b 01          movl %gs:(%ecx), %eax
 
 .type tlslocal0,@object
 .section .tbss,"awT",@nobits
diff --git a/test/ELF/x86-64-reloc-tpoff32-fpic.s b/test/ELF/x86-64-reloc-tpoff32-fpic.s
new file mode 100644
index 000000000000..5be3dc317012
--- /dev/null
+++ b/test/ELF/x86-64-reloc-tpoff32-fpic.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: not ld.lld %t.o -shared -o %t.so 2>&1 | FileCheck %s
+
+# CHECK: relocation R_X86_64_TPOFF32 cannot be used against shared object; recompile with -fPIC
+# CHECK: >>> defined in {{.*}}.o
+# CHECK: >>> referenced by {{.*}}.o:(.tdata+0xC)
+
+.section ".tdata", "awT", @progbits
+.globl var
+var:
+
+movq %fs:0, %rax
+leaq var@TPOFF(%rax),%rax
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 9cd085398c37..84d35d43f4e8 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -12,6 +12,5 @@ function(add_lld_unittest test_dirname)
   target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS})
 endfunction()
 
-add_subdirectory(CoreTests)
 add_subdirectory(DriverTests)
 add_subdirectory(MachOTests)
diff --git a/unittests/CoreTests/CMakeLists.txt b/unittests/CoreTests/CMakeLists.txt
deleted file mode 100644
index 9f68f56a6c03..000000000000
--- a/unittests/CoreTests/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_lld_unittest(CoreTests
-  ParallelTest.cpp
-  )
-
-target_link_libraries(CoreTests
-  lldCore ${LLVM_PTHREAD_LIB}
-  )
diff --git a/unittests/CoreTests/ParallelTest.cpp b/unittests/CoreTests/ParallelTest.cpp
deleted file mode 100644
index bd8507026a07..000000000000
--- a/unittests/CoreTests/ParallelTest.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- lld/unittest/ParallelTest.cpp --------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Parallel.h unit tests.
-///
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-#include "lld/Core/Parallel.h"
-#include <array>
-#include <random>
-
-uint32_t array[1024 * 1024];
-
-TEST(Parallel, sort) {
-  std::mt19937 randEngine;
-  std::uniform_int_distribution<uint32_t> dist;
-
-  for (auto &i : array)
-    i = dist(randEngine);
-
-  lld::parallel_sort(std::begin(array), std::end(array));
-  ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array)));
-}
-
-TEST(Parallel, parallel_for) {
-  // We need to test the case with a TaskSize > 1. We are white-box testing
-  // here. The TaskSize is calculated as (End - Begin) / 1024 at the time of
-  // writing.
-  uint32_t range[2050];
-  std::fill(range, range + 2050, 1);
-  lld::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; });
-
-  uint32_t expected[2049];
-  std::fill(expected, expected + 2049, 2);
-  ASSERT_TRUE(std::equal(range, range + 2049, expected));
-  // Check that we don't write past the end of the requested range.
-  ASSERT_EQ(range[2049], 1u);
-}

From 7af96fb3afd6725a2824a0a5ca5dad34e5e0b056 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:22:39 +0000
Subject: [PATCH 4/9] Vendor import of llvm trunk r303291:
 https://llvm.org/svn/llvm-project/llvm/trunk@303291

---
 include/llvm/ADT/APInt.h                      |    4 +-
 include/llvm/ADT/BitVector.h                  |  313 ++--
 include/llvm/ADT/PostOrderIterator.h          |   33 +-
 include/llvm/ADT/PriorityWorklist.h           |   15 +-
 include/llvm/ADT/SCCIterator.h                |   10 +-
 include/llvm/ADT/Sequence.h                   |   21 +-
 include/llvm/ADT/SetVector.h                  |   22 +-
 include/llvm/ADT/SmallBitVector.h             |   13 +
 include/llvm/ADT/SmallPtrSet.h                |   30 +-
 include/llvm/ADT/SmallVector.h                |   45 +-
 include/llvm/ADT/SparseBitVector.h            |   20 +-
 include/llvm/ADT/SparseMultiSet.h             |   39 +-
 include/llvm/ADT/SparseSet.h                  |   22 +-
 include/llvm/ADT/StringExtras.h               |   22 +-
 include/llvm/ADT/StringMap.h                  |   88 +-
 include/llvm/ADT/StringRef.h                  |   20 +-
 include/llvm/ADT/StringSet.h                  |   17 +-
 include/llvm/ADT/TinyPtrVector.h              |   14 +-
 include/llvm/ADT/UniqueVector.h               |   15 +-
 include/llvm/Analysis/ProfileSummaryInfo.h    |   15 +
 .../llvm/DebugInfo/CodeView/CVTypeVisitor.h   |   32 +-
 .../CodeView/RandomAccessTypeVisitor.h        |   15 -
 include/llvm/DebugInfo/DWARF/DWARFAttribute.h |    4 +-
 .../DebugInfo/DWARF/DWARFDebugArangeSet.h     |   18 +-
 .../llvm/DebugInfo/DWARF/DWARFDebugAranges.h  |    8 +-
 .../DebugInfo/DWARF/DWARFDebugRangeList.h     |   38 +-
 include/llvm/DebugInfo/DWARF/DWARFDie.h       |   44 +-
 include/llvm/DebugInfo/DWARF/DWARFFormValue.h |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h  |   16 +-
 include/llvm/DebugInfo/DWARF/DWARFRelocMap.h  |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFUnit.h      |   12 +-
 include/llvm/DebugInfo/PDB/Native/TpiStream.h |    1 +
 include/llvm/IR/IntrinsicsPowerPC.td          |    2 +
 .../Target/GlobalISel/SelectionDAGCompat.td   |    1 +
 lib/Analysis/DependenceAnalysis.cpp           |   33 +-
 lib/Analysis/InlineCost.cpp                   |   42 +-
 lib/Analysis/InstructionSimplify.cpp          |   18 +
 lib/Analysis/ProfileSummaryInfo.cpp           |    2 +-
 lib/Analysis/ScalarEvolution.cpp              |   43 +-
 lib/CodeGen/AggressiveAntiDepBreaker.cpp      |    5 +-
 lib/CodeGen/AsmPrinter/CodeViewDebug.cpp      |    2 +-
 .../AsmPrinter/DbgValueHistoryCalculator.cpp  |    3 +-
 lib/CodeGen/GlobalISel/IRTranslator.cpp       |    5 +
 lib/CodeGen/MachineVerifier.cpp               |    2 +-
 lib/CodeGen/RegAllocGreedy.cpp                |   11 +-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp      |   28 +-
 lib/CodeGen/SpillPlacement.cpp                |    4 +-
 lib/CodeGen/StackColoring.cpp                 |    6 +-
 lib/CodeGen/TargetLoweringBase.cpp            |    2 +-
 lib/CodeGen/TargetPassConfig.cpp              |   24 +-
 lib/CodeGen/TargetRegisterInfo.cpp            |    3 +-
 lib/DebugInfo/CodeView/CVTypeDumper.cpp       |   22 +-
 lib/DebugInfo/CodeView/CVTypeVisitor.cpp      |   99 +-
 .../CodeView/RandomAccessTypeVisitor.cpp      |   10 +-
 lib/DebugInfo/CodeView/TypeDumpVisitor.cpp    |    3 +-
 lib/DebugInfo/CodeView/TypeStreamMerger.cpp   |   17 +-
 lib/DebugInfo/DWARF/DWARFContext.cpp          |   63 +-
 .../PDB/Native/PDBTypeServerHandler.cpp       |    3 +-
 .../RuntimeDyld/RuntimeDyld.cpp               |   10 +-
 lib/Support/CrashRecoveryContext.cpp          |  130 +-
 lib/Support/Unix/Path.inc                     |   30 +-
 lib/Target/AArch64/AArch64FrameLowering.cpp   |    3 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp    |  341 +---
 lib/Target/AArch64/AArch64ISelLowering.h      |    1 +
 .../AArch64/AArch64TargetTransformInfo.cpp    |   25 +
 .../AArch64/AArch64TargetTransformInfo.h      |    3 +
 lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp      |   30 +-
 lib/Target/AMDGPU/AMDGPUSubtarget.h           |    4 +
 lib/Target/AMDGPU/SIISelLowering.cpp          |    3 +-
 lib/Target/AMDGPU/SIInstrInfo.cpp             |   14 +-
 lib/Target/AMDGPU/SIRegisterInfo.cpp          |   26 +-
 lib/Target/AMDGPU/VOP3Instructions.td         |   20 +-
 lib/Target/ARM/ARMInstructionSelector.cpp     |   15 -
 lib/Target/ARM/Thumb1FrameLowering.cpp        |    3 +-
 lib/Target/Mips/MipsDelaySlotFiller.cpp       |    2 +-
 lib/Target/PowerPC/PPCFrameLowering.cpp       |   23 +-
 lib/Target/PowerPC/PPCISelLowering.cpp        |   45 +-
 lib/Target/PowerPC/PPCISelLowering.h          |    1 +
 lib/Target/PowerPC/PPCInstr64Bit.td           |    4 +
 lib/Target/PowerPC/PPCInstrInfo.cpp           |   13 +
 lib/Target/PowerPC/PPCInstrInfo.td            |   12 +-
 .../SystemZ/SystemZTargetTransformInfo.cpp    |   34 +-
 .../WebAssembly/WebAssemblyRegColoring.cpp    |    3 +-
 .../WebAssembly/known_gcc_test_failures.txt   |    3 -
 lib/Target/X86/X86.td                         |    3 -
 lib/Target/X86/X86FixupLEAs.cpp               |  269 +--
 lib/Target/X86/X86InstructionSelector.cpp     |   66 +
 lib/Target/X86/X86LegalizerInfo.cpp           |    5 +
 lib/Target/X86/X86Subtarget.h                 |    6 -
 lib/Target/X86/X86TargetMachine.cpp           |    2 -
 lib/Target/X86/X86TargetTransformInfo.cpp     |   18 +
 lib/Transforms/Coroutines/CoroFrame.cpp       |   28 +-
 .../InstCombine/InstCombineInternal.h         |   21 +
 .../InstCombine/InstructionCombining.cpp      |   47 +-
 lib/Transforms/Scalar/LICM.cpp                |    2 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp  |   16 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp  |    3 +-
 lib/Transforms/Scalar/NewGVN.cpp              |   68 +-
 lib/Transforms/Scalar/Reassociate.cpp         |    2 +-
 lib/Transforms/Scalar/SimpleLoopUnswitch.cpp  |   37 +-
 test/Analysis/CostModel/SystemZ/div-pow2.ll   |  154 ++
 test/Analysis/CostModel/X86/bitreverse.ll     |   69 +
 test/Analysis/CostModel/X86/ctbits-cost.ll    |  587 -------
 test/Analysis/CostModel/X86/ctlz.ll           |  233 +++
 test/Analysis/CostModel/X86/ctpop.ll          |  133 ++
 test/Analysis/CostModel/X86/cttz.ll           |  233 +++
 test/CodeGen/AArch64/aarch64-addv.ll          |   63 +-
 test/CodeGen/AArch64/aarch64-minmaxv.ll       |  424 +----
 test/CodeGen/AArch64/arm64-vabs.ll            |   42 +-
 test/CodeGen/AArch64/ldst-zero.ll             |   23 +-
 test/CodeGen/AArch64/misched-stp.ll           |   35 +-
 test/CodeGen/AMDGPU/fmax3.ll                  |  101 +-
 test/CodeGen/AMDGPU/fmin3.ll                  |  100 +-
 test/CodeGen/AMDGPU/global-constant.ll        |    4 +-
 test/CodeGen/AMDGPU/immv216.ll                |    6 +-
 test/CodeGen/AMDGPU/max3.ll                   |   91 +-
 test/CodeGen/AMDGPU/min3.ll                   |  131 +-
 test/CodeGen/AMDGPU/packed-op-sel.ll          |  266 +++
 .../CodeGen/ARM/2011-02-04-AntidepMultidef.ll |   16 +-
 .../ARM/2012-10-04-AAPCS-byval-align8.ll      |    2 +-
 test/CodeGen/ARM/dag-combine-ldst.ll          |    2 +-
 test/CodeGen/MSP430/vararg.ll                 |    1 -
 test/CodeGen/Mips/msa/bmzi_bmnzi.ll           |    8 +-
 test/CodeGen/PowerPC/atomic-2.ll              |   14 +-
 test/CodeGen/PowerPC/atomics-indexed.ll       |   14 +-
 test/CodeGen/PowerPC/atomics-regression.ll    |   64 +-
 test/CodeGen/PowerPC/atomics.ll               |   14 +-
 test/CodeGen/PowerPC/ppcf128sf.ll             |    8 +-
 test/CodeGen/PowerPC/save-bp.ll               |   54 +
 test/CodeGen/PowerPC/save-cr-ppc32svr4.ll     |   46 +
 test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll   |   57 +
 test/CodeGen/SPARC/32abi.ll                   |   16 +-
 test/CodeGen/SPARC/64abi.ll                   |   18 +-
 test/CodeGen/SystemZ/swift-return.ll          |    6 +-
 test/CodeGen/Thumb/stack-access.ll            |   10 +-
 test/CodeGen/Thumb2/ldr-str-imm12.ll          |    4 +-
 test/CodeGen/X86/GlobalISel/add-scalar.ll     |   94 +-
 test/CodeGen/X86/GlobalISel/legalize-add.mir  |   69 +-
 .../X86/GlobalISel/regbankselect-X32.mir      |   36 +
 .../CodeGen/X86/GlobalISel/select-add-x32.mir |   63 +
 test/CodeGen/X86/arg-copy-elide.ll            |    7 +-
 test/CodeGen/X86/leaFixup32.mir               |  508 ------
 test/CodeGen/X86/leaFixup64.mir               | 1041 ------------
 test/CodeGen/X86/nontemporal.ll               |   72 +-
 test/CodeGen/X86/psubus.ll                    | 1443 ++++++++---------
 test/CodeGen/X86/store-narrow.ll              |    5 +-
 test/CodeGen/X86/swift-return.ll              |    6 +-
 test/CodeGen/X86/win32-spill-xmm.ll           |    2 +-
 test/CodeGen/X86/win64_sibcall.ll             |    4 +-
 test/CodeGen/X86/win64_vararg.ll              |    4 +-
 test/CodeGen/X86/x86-64-ms_abi-vararg.ll      |    4 +-
 .../RuntimeDyld/X86/ELF_x86-64_debug_frame.s  |   20 +
 test/Feature/optnone-llc.ll                   |    1 +
 test/MC/AMDGPU/vop3-gfx9.s                    |   24 +
 test/TableGen/GlobalISelEmitter.td            |   75 +
 .../Transforms/Coroutines/coro-catchswitch.ll |   88 +
 test/Transforms/Inline/inline-hot-callee.ll   |   10 +-
 .../InstCombine/canonicalize_branch.ll        |  529 +++++-
 test/Transforms/InstCombine/debuginfo-skip.ll |   44 +
 test/Transforms/InstSimplify/AndOrXor.ll      |   12 +-
 .../LoopVectorize/AArch64/pr33053.ll          |   56 +
 .../AArch64/reduction-small-size.ll           |   26 +-
 test/Transforms/NewGVN/pr32934.ll             |    1 -
 .../SLPVectorizer/AArch64/gather-root.ll      |   40 +-
 tools/llvm-pdbdump/Analyze.cpp                |   15 +-
 tools/llvm-pdbdump/LLVMOutputStyle.cpp        |   32 +-
 tools/llvm-pdbdump/PdbYaml.cpp                |   14 +-
 tools/llvm-pdbdump/YamlTypeDumper.cpp         |   78 +-
 unittests/ADT/BitVectorTest.cpp               |  184 ++-
 unittests/Analysis/ProfileSummaryInfoTest.cpp |    8 +
 .../CodeView/RandomAccessVisitorTest.cpp      |    1 +
 .../DebugInfo/PDB/TypeServerHandlerTest.cpp   |   20 +-
 unittests/Support/BinaryStreamTest.cpp        |   29 +-
 unittests/Support/CMakeLists.txt              |    1 +
 unittests/Support/CrashRecoveryTest.cpp       |   83 +
 utils/TableGen/AsmMatcherEmitter.cpp          |    4 +-
 utils/TableGen/GlobalISelEmitter.cpp          |  124 +-
 utils/lit/lit/main.py                         |   12 +-
 utils/lit/lit/run.py                          |  103 +-
 179 files changed, 5516 insertions(+), 5357 deletions(-)
 create mode 100644 test/Analysis/CostModel/SystemZ/div-pow2.ll
 delete mode 100644 test/Analysis/CostModel/X86/ctbits-cost.ll
 create mode 100644 test/Analysis/CostModel/X86/ctlz.ll
 create mode 100644 test/Analysis/CostModel/X86/ctpop.ll
 create mode 100644 test/Analysis/CostModel/X86/cttz.ll
 create mode 100644 test/CodeGen/AMDGPU/packed-op-sel.ll
 create mode 100644 test/CodeGen/PowerPC/save-bp.ll
 create mode 100644 test/CodeGen/PowerPC/save-cr-ppc32svr4.ll
 create mode 100644 test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
 create mode 100644 test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
 create mode 100644 test/CodeGen/X86/GlobalISel/select-add-x32.mir
 delete mode 100644 test/CodeGen/X86/leaFixup32.mir
 delete mode 100644 test/CodeGen/X86/leaFixup64.mir
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_x86-64_debug_frame.s
 create mode 100644 test/Transforms/Coroutines/coro-catchswitch.ll
 create mode 100644 test/Transforms/InstCombine/debuginfo-skip.ll
 create mode 100644 test/Transforms/LoopVectorize/AArch64/pr33053.ll
 create mode 100644 unittests/Support/CrashRecoveryTest.cpp

diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 94fbd1a29bf9..894e5571f8ad 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1067,9 +1067,7 @@ class LLVM_NODISCARD APInt {
   /// \returns the bit value at bitPosition
   bool operator[](unsigned bitPosition) const {
     assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
-    return (maskBit(bitPosition) &
-            (isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)])) !=
-           0;
+    return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
   }
 
   /// @}
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 4a2af7cd68a6..e68ef5f53d10 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -15,6 +15,7 @@
 #define LLVM_ADT_BITVECTOR_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
@@ -26,6 +27,50 @@
 
 namespace llvm {
 
+/// ForwardIterator for the bits that are set.
+/// Iterators get invalidated when resize / reserve is called.
+template <typename BitVectorT> class const_set_bits_iterator_impl {
+  const BitVectorT &Parent;
+  int Current = 0;
+
+  void advance() {
+    assert(Current != -1 && "Trying to advance past end.");
+    Current = Parent.find_next(Current);
+  }
+
+public:
+  const_set_bits_iterator_impl(const BitVectorT &Parent, int Current)
+      : Parent(Parent), Current(Current) {}
+  explicit const_set_bits_iterator_impl(const BitVectorT &Parent)
+      : const_set_bits_iterator_impl(Parent, Parent.find_first()) {}
+  const_set_bits_iterator_impl(const const_set_bits_iterator_impl &) = default;
+
+  const_set_bits_iterator_impl operator++(int) {
+    auto Prev = *this;
+    advance();
+    return Prev;
+  }
+
+  const_set_bits_iterator_impl &operator++() {
+    advance();
+    return *this;
+  }
+
+  unsigned operator*() const { return Current; }
+
+  bool operator==(const const_set_bits_iterator_impl &Other) const {
+    assert(&Parent == &Other.Parent &&
+           "Comparing iterators from different BitVectors");
+    return Current == Other.Current;
+  }
+
+  bool operator!=(const const_set_bits_iterator_impl &Other) const {
+    assert(&Parent == &Other.Parent &&
+           "Comparing iterators from different BitVectors");
+    return Current != Other.Current;
+  }
+};
+
 class BitVector {
   typedef unsigned long BitWord;
 
@@ -73,6 +118,18 @@ class BitVector {
     }
   };
 
+  typedef const_set_bits_iterator_impl<BitVector> const_set_bits_iterator;
+  typedef const_set_bits_iterator set_iterator;
+
+  const_set_bits_iterator set_bits_begin() const {
+    return const_set_bits_iterator(*this);
+  }
+  const_set_bits_iterator set_bits_end() const {
+    return const_set_bits_iterator(*this, -1);
+  }
+  iterator_range<const_set_bits_iterator> set_bits() const {
+    return make_range(set_bits_begin(), set_bits_end());
+  }
 
   /// BitVector default ctor - Creates an empty bitvector.
   BitVector() : Size(0) {}
@@ -146,138 +203,164 @@ class BitVector {
     return !any();
   }
 
-  /// find_first - Returns the index of the first set bit, -1 if none
-  /// of the bits are set.
-  int find_first() const {
-    for (unsigned i = 0; i < NumBitWords(size()); ++i)
-      if (Bits[i] != 0)
-        return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
+  /// find_first_in - Returns the index of the first set bit in the range
+  /// [Begin, End).  Returns -1 if all bits in the range are unset.
+  int find_first_in(unsigned Begin, unsigned End) const {
+    assert(Begin <= End && End <= Size);
+    if (Begin == End)
+      return -1;
+
+    unsigned FirstWord = Begin / BITWORD_SIZE;
+    unsigned LastWord = (End - 1) / BITWORD_SIZE;
+
+    // Check subsequent words.
+    for (unsigned i = FirstWord; i <= LastWord; ++i) {
+      BitWord Copy = Bits[i];
+
+      if (i == FirstWord) {
+        unsigned FirstBit = Begin % BITWORD_SIZE;
+        Copy &= maskTrailingZeros<BitWord>(FirstBit);
+      }
+
+      if (i == LastWord) {
+        unsigned LastBit = (End - 1) % BITWORD_SIZE;
+        Copy &= maskTrailingOnes<BitWord>(LastBit + 1);
+      }
+      if (Copy != 0)
+        return i * BITWORD_SIZE + countTrailingZeros(Copy);
+    }
     return -1;
   }
 
+  /// find_last_in - Returns the index of the last set bit in the range
+  /// [Begin, End).  Returns -1 if all bits in the range are unset.
+  int find_last_in(unsigned Begin, unsigned End) const {
+    assert(Begin <= End && End <= Size);
+    if (Begin == End)
+      return -1;
+
+    unsigned LastWord = (End - 1) / BITWORD_SIZE;
+    unsigned FirstWord = Begin / BITWORD_SIZE;
+
+    for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) {
+      unsigned CurrentWord = i - 1;
+
+      BitWord Copy = Bits[CurrentWord];
+      if (CurrentWord == LastWord) {
+        unsigned LastBit = (End - 1) % BITWORD_SIZE;
+        Copy &= maskTrailingOnes<BitWord>(LastBit + 1);
+      }
+
+      if (CurrentWord == FirstWord) {
+        unsigned FirstBit = Begin % BITWORD_SIZE;
+        Copy &= maskTrailingZeros<BitWord>(FirstBit);
+      }
+
+      if (Copy != 0)
+        return (CurrentWord + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1;
+    }
+
+    return -1;
+  }
+
+  /// find_first_unset_in - Returns the index of the first unset bit in the
+  /// range [Begin, End).  Returns -1 if all bits in the range are set.
+  int find_first_unset_in(unsigned Begin, unsigned End) const {
+    assert(Begin <= End && End <= Size);
+    if (Begin == End)
+      return -1;
+
+    unsigned FirstWord = Begin / BITWORD_SIZE;
+    unsigned LastWord = (End - 1) / BITWORD_SIZE;
+
+    // Check subsequent words.
+    for (unsigned i = FirstWord; i <= LastWord; ++i) {
+      BitWord Copy = Bits[i];
+
+      if (i == FirstWord) {
+        unsigned FirstBit = Begin % BITWORD_SIZE;
+        Copy |= maskTrailingOnes<BitWord>(FirstBit);
+      }
+
+      if (i == LastWord) {
+        unsigned LastBit = (End - 1) % BITWORD_SIZE;
+        Copy |= maskTrailingZeros<BitWord>(LastBit + 1);
+      }
+      if (Copy != ~0UL) {
+        unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy);
+        return Result < size() ? Result : -1;
+      }
+    }
+    return -1;
+  }
+
+  /// find_last_unset_in - Returns the index of the last unset bit in the
+  /// range [Begin, End).  Returns -1 if all bits in the range are set.
+  int find_last_unset_in(unsigned Begin, unsigned End) const {
+    assert(Begin <= End && End <= Size);
+    if (Begin == End)
+      return -1;
+
+    unsigned LastWord = (End - 1) / BITWORD_SIZE;
+    unsigned FirstWord = Begin / BITWORD_SIZE;
+
+    for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) {
+      unsigned CurrentWord = i - 1;
+
+      BitWord Copy = Bits[CurrentWord];
+      if (CurrentWord == LastWord) {
+        unsigned LastBit = (End - 1) % BITWORD_SIZE;
+        Copy |= maskTrailingZeros<BitWord>(LastBit + 1);
+      }
+
+      if (CurrentWord == FirstWord) {
+        unsigned FirstBit = Begin % BITWORD_SIZE;
+        Copy |= maskTrailingOnes<BitWord>(FirstBit);
+      }
+
+      if (Copy != ~0UL) {
+        unsigned Result =
+            (CurrentWord + 1) * BITWORD_SIZE - countLeadingOnes(Copy) - 1;
+        return Result < Size ? Result : -1;
+      }
+    }
+    return -1;
+  }
+
+  /// find_first - Returns the index of the first set bit, -1 if none
+  /// of the bits are set.
+  int find_first() const { return find_first_in(0, Size); }
+
   /// find_last - Returns the index of the last set bit, -1 if none of the bits
   /// are set.
-  int find_last() const {
-    if (Size == 0)
-      return -1;
-
-    unsigned N = NumBitWords(size());
-    assert(N > 0);
-
-    unsigned i = N - 1;
-    while (i > 0 && Bits[i] == BitWord(0))
-      --i;
-
-    return int((i + 1) * BITWORD_SIZE - countLeadingZeros(Bits[i])) - 1;
-  }
-
-  /// find_first_unset - Returns the index of the first unset bit, -1 if all
-  /// of the bits are set.
-  int find_first_unset() const {
-    for (unsigned i = 0; i < NumBitWords(size()); ++i)
-      if (Bits[i] != ~0UL) {
-        unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Bits[i]);
-        return Result < size() ? Result : -1;
-      }
-    return -1;
-  }
-
-  /// find_last_unset - Returns the index of the last unset bit, -1 if all of
-  /// the bits are set.
-  int find_last_unset() const {
-    if (Size == 0)
-      return -1;
-
-    const unsigned N = NumBitWords(size());
-    assert(N > 0);
-
-    unsigned i = N - 1;
-    BitWord W = Bits[i];
-
-    // The last word in the BitVector has some unused bits, so we need to set
-    // them all to 1 first.  Set them all to 1 so they don't get treated as
-    // valid unset bits.
-    unsigned UnusedCount = BITWORD_SIZE - size() % BITWORD_SIZE;
-    W |= maskLeadingOnes<BitWord>(UnusedCount);
-
-    while (W == ~BitWord(0) && --i > 0)
-      W = Bits[i];
-
-    return int((i + 1) * BITWORD_SIZE - countLeadingOnes(W)) - 1;
-  }
+  int find_last() const { return find_last_in(0, Size); }
 
   /// find_next - Returns the index of the next set bit following the
   /// "Prev" bit. Returns -1 if the next set bit is not found.
-  int find_next(unsigned Prev) const {
-    ++Prev;
-    if (Prev >= Size)
-      return -1;
+  int find_next(unsigned Prev) const { return find_first_in(Prev + 1, Size); }
 
-    unsigned WordPos = Prev / BITWORD_SIZE;
-    unsigned BitPos = Prev % BITWORD_SIZE;
-    BitWord Copy = Bits[WordPos];
-    // Mask off previous bits.
-    Copy &= maskTrailingZeros<BitWord>(BitPos);
+  /// find_prev - Returns the index of the first set bit that precedes the
+  /// the bit at \p PriorTo.  Returns -1 if all previous bits are unset.
+  int find_prev(unsigned PriorTo) const { return find_last_in(0, PriorTo); }
 
-    if (Copy != 0)
-      return WordPos * BITWORD_SIZE + countTrailingZeros(Copy);
-
-    // Check subsequent words.
-    for (unsigned i = WordPos+1; i < NumBitWords(size()); ++i)
-      if (Bits[i] != 0)
-        return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
-    return -1;
-  }
+  /// find_first_unset - Returns the index of the first unset bit, -1 if all
+  /// of the bits are set.
+  int find_first_unset() const { return find_first_unset_in(0, Size); }
 
   /// find_next_unset - Returns the index of the next unset bit following the
   /// "Prev" bit.  Returns -1 if all remaining bits are set.
   int find_next_unset(unsigned Prev) const {
-    ++Prev;
-    if (Prev >= Size)
-      return -1;
-
-    unsigned WordPos = Prev / BITWORD_SIZE;
-    unsigned BitPos = Prev % BITWORD_SIZE;
-    BitWord Copy = Bits[WordPos];
-    // Mask in previous bits.
-    BitWord Mask = (1 << BitPos) - 1;
-    Copy |= Mask;
-
-    if (Copy != ~0UL)
-      return next_unset_in_word(WordPos, Copy);
-
-    // Check subsequent words.
-    for (unsigned i = WordPos + 1; i < NumBitWords(size()); ++i)
-      if (Bits[i] != ~0UL)
-        return next_unset_in_word(i, Bits[i]);
-    return -1;
+    return find_first_unset_in(Prev + 1, Size);
   }
 
-  /// find_prev - Returns the index of the first set bit that precedes the
-  /// the bit at \p PriorTo.  Returns -1 if all previous bits are unset.
-  int find_prev(unsigned PriorTo) const {
-    if (PriorTo == 0)
-      return -1;
+  /// find_last_unset - Returns the index of the last unset bit, -1 if all of
+  /// the bits are set.
+  int find_last_unset() const { return find_last_unset_in(0, Size); }
 
-    --PriorTo;
-
-    unsigned WordPos = PriorTo / BITWORD_SIZE;
-    unsigned BitPos = PriorTo % BITWORD_SIZE;
-    BitWord Copy = Bits[WordPos];
-    // Mask off next bits.
-    Copy &= maskTrailingOnes<BitWord>(BitPos + 1);
-
-    if (Copy != 0)
-      return (WordPos + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1;
-
-    // Check previous words.
-    for (unsigned i = 1; i <= WordPos; ++i) {
-      unsigned Index = WordPos - i;
-      if (Bits[Index] == 0)
-        continue;
-      return (Index + 1) * BITWORD_SIZE - countLeadingZeros(Bits[Index]) - 1;
-    }
-    return -1;
+  /// find_prev_unset - Returns the index of the first unset bit that precedes
+  /// the bit at \p PriorTo.  Returns -1 if all previous bits are set.
+  int find_prev_unset(unsigned PriorTo) {
+    return find_last_unset_in(0, PriorTo);
   }
 
   /// clear - Removes all bits from the bitvector. Does not change capacity.
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 8fc08eb252eb..a179d29956b1 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -96,24 +96,14 @@ template <class GraphT,
 class po_iterator
     : public std::iterator<std::forward_iterator_tag, typename GT::NodeRef>,
       public po_iterator_storage<SetType, ExtStorage> {
-  typedef std::iterator<std::forward_iterator_tag, typename GT::NodeRef> super;
-  typedef typename GT::NodeRef NodeRef;
-  typedef typename GT::ChildIteratorType ChildItTy;
+  using super = std::iterator<std::forward_iterator_tag, typename GT::NodeRef>;
+  using NodeRef = typename GT::NodeRef;
+  using ChildItTy = typename GT::ChildIteratorType;
 
   // VisitStack - Used to maintain the ordering.  Top = current block
   // First element is basic block pointer, second is the 'next child' to visit
   std::vector<std::pair<NodeRef, ChildItTy>> VisitStack;
 
-  void traverseChild() {
-    while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
-      NodeRef BB = *VisitStack.back().second++;
-      if (this->insertEdge(Optional<NodeRef>(VisitStack.back().first), BB)) {
-        // If the block is not visited...
-        VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
-      }
-    }
-  }
-
   po_iterator(NodeRef BB) {
     this->insertEdge(Optional<NodeRef>(), BB);
     VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
@@ -134,8 +124,18 @@ class po_iterator
       : po_iterator_storage<SetType, ExtStorage>(S) {
   } // End is when stack is empty.
 
+  void traverseChild() {
+    while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
+      NodeRef BB = *VisitStack.back().second++;
+      if (this->insertEdge(Optional<NodeRef>(VisitStack.back().first), BB)) {
+        // If the block is not visited...
+        VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+      }
+    }
+  }
+
 public:
-  typedef typename super::pointer pointer;
+  using pointer = typename super::pointer;
 
   // Provide static "constructors"...
   static po_iterator begin(GraphT G) {
@@ -286,7 +286,8 @@ inverse_post_order_ext(const T &G, SetType &S) {
 
 template<class GraphT, class GT = GraphTraits<GraphT>>
 class ReversePostOrderTraversal {
-  typedef typename GT::NodeRef NodeRef;
+  using NodeRef = typename GT::NodeRef;
+
   std::vector<NodeRef> Blocks; // Block list in normal PO order
 
   void Initialize(NodeRef BB) {
@@ -294,7 +295,7 @@ class ReversePostOrderTraversal {
   }
 
 public:
-  typedef typename std::vector<NodeRef>::reverse_iterator rpo_iterator;
+  using rpo_iterator = typename std::vector<NodeRef>::reverse_iterator;
 
   ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
 
diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h
index 3198dd438700..35891e931801 100644
--- a/include/llvm/ADT/PriorityWorklist.h
+++ b/include/llvm/ADT/PriorityWorklist.h
@@ -17,13 +17,14 @@
 #define LLVM_ADT_PRIORITYWORKLIST_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <iterator>
+#include <type_traits>
 #include <vector>
 
 namespace llvm {
@@ -55,11 +56,11 @@ template <typename T, typename VectorT = std::vector<T>,
           typename MapT = DenseMap<T, ptrdiff_t>>
 class PriorityWorklist {
 public:
-  typedef T value_type;
-  typedef T key_type;
-  typedef T& reference;
-  typedef const T& const_reference;
-  typedef typename MapT::size_type size_type;
+  using value_type = T;
+  using key_type = T;
+  using reference = T&;
+  using const_reference = const T&;
+  using size_type = typename MapT::size_type;
 
   /// Construct an empty PriorityWorklist
   PriorityWorklist() = default;
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index 9a8a7b168fce..734a58f87da2 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -1,4 +1,4 @@
-//===---- ADT/SCCIterator.h - Strongly Connected Comp. Iter. ----*- C++ -*-===//
+//===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -43,10 +43,10 @@ template <class GraphT, class GT = GraphTraits<GraphT>>
 class scc_iterator : public iterator_facade_base<
                          scc_iterator<GraphT, GT>, std::forward_iterator_tag,
                          const std::vector<typename GT::NodeRef>, ptrdiff_t> {
-  typedef typename GT::NodeRef NodeRef;
-  typedef typename GT::ChildIteratorType ChildItTy;
-  typedef std::vector<NodeRef> SccTy;
-  typedef typename scc_iterator::reference reference;
+  using NodeRef = typename GT::NodeRef;
+  using ChildItTy = typename GT::ChildIteratorType;
+  using SccTy = std::vector<NodeRef>;
+  using reference = typename scc_iterator::reference;
 
   /// Element of VisitStack during DFS.
   struct StackElement {
diff --git a/include/llvm/ADT/Sequence.h b/include/llvm/ADT/Sequence.h
index 5d36831cc128..3d4a897bf9a9 100644
--- a/include/llvm/ADT/Sequence.h
+++ b/include/llvm/ADT/Sequence.h
@@ -13,27 +13,31 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_SEQ_H
-#define LLVM_ADT_SEQ_H
+#ifndef LLVM_ADT_SEQUENCE_H
+#define LLVM_ADT_SEQUENCE_H
 
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
 
 namespace llvm {
 
 namespace detail {
+
 template <typename ValueT>
 class value_sequence_iterator
     : public iterator_facade_base<value_sequence_iterator<ValueT>,
                                   std::random_access_iterator_tag,
                                   const ValueT> {
-  typedef typename value_sequence_iterator::iterator_facade_base BaseT;
+  using BaseT = typename value_sequence_iterator::iterator_facade_base;
 
   ValueT Value;
 
 public:
-  typedef typename BaseT::difference_type difference_type;
-  typedef typename BaseT::reference reference;
+  using difference_type = typename BaseT::difference_type;
+  using reference = typename BaseT::reference;
 
   value_sequence_iterator() = default;
   value_sequence_iterator(const value_sequence_iterator &) = default;
@@ -65,7 +69,8 @@ class value_sequence_iterator
 
   reference operator*() const { return Value; }
 };
-} // End detail namespace.
+
+} // end namespace detail
 
 template <typename ValueT>
 iterator_range<detail::value_sequence_iterator<ValueT>> seq(ValueT Begin,
@@ -74,6 +79,6 @@ iterator_range<detail::value_sequence_iterator<ValueT>> seq(ValueT Begin,
                     detail::value_sequence_iterator<ValueT>(End));
 }
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SEQUENCE_H
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index 13378aa3a04e..04ed52fc543f 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -40,17 +40,17 @@ template <typename T, typename Vector = std::vector<T>,
           typename Set = DenseSet<T>>
 class SetVector {
 public:
-  typedef T value_type;
-  typedef T key_type;
-  typedef T& reference;
-  typedef const T& const_reference;
-  typedef Set set_type;
-  typedef Vector vector_type;
-  typedef typename vector_type::const_iterator iterator;
-  typedef typename vector_type::const_iterator const_iterator;
-  typedef typename vector_type::const_reverse_iterator reverse_iterator;
-  typedef typename vector_type::const_reverse_iterator const_reverse_iterator;
-  typedef typename vector_type::size_type size_type;
+  using value_type = T;
+  using key_type = T;
+  using reference = T&;
+  using const_reference = const T&;
+  using set_type = Set;
+  using vector_type = Vector;
+  using iterator = typename vector_type::const_iterator;
+  using const_iterator = typename vector_type::const_iterator;
+  using reverse_iterator = typename vector_type::const_reverse_iterator;
+  using const_reverse_iterator = typename vector_type::const_reverse_iterator;
+  using size_type = typename vector_type::size_type;
 
   /// \brief Construct an empty SetVector
   SetVector() = default;
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 0eeacc162543..0ff427066959 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -134,6 +134,19 @@ class SmallBitVector {
   }
 
 public:
+  typedef const_set_bits_iterator_impl<SmallBitVector> const_set_bits_iterator;
+  typedef const_set_bits_iterator set_iterator;
+
+  const_set_bits_iterator set_bits_begin() const {
+    return const_set_bits_iterator(*this);
+  }
+  const_set_bits_iterator set_bits_end() const {
+    return const_set_bits_iterator(*this, -1);
+  }
+  iterator_range<const_set_bits_iterator> set_bits() const {
+    return make_range(set_bits_begin(), set_bits_end());
+  }
+
   /// Creates an empty bitvector.
   SmallBitVector() : X(1) {}
 
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 196ab6338047..b49d216e0b6e 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -27,15 +27,13 @@
 #include <iterator>
 #include <utility>
 
-#if LLVM_ENABLE_ABI_BREAKING_CHECKS
 namespace llvm {
+
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
 template <class T = void> struct ReverseIterate { static bool value; };
 template <class T> bool ReverseIterate<T>::value = false;
-}
 #endif
 
-namespace llvm {
-
 /// SmallPtrSetImplBase - This is the common code shared among all the
 /// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
 /// for small and one for large sets.
@@ -92,7 +90,7 @@ class SmallPtrSetImplBase {
   }
 
 public:
-  typedef unsigned size_type;
+  using size_type = unsigned;
 
   SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete;
 
@@ -273,14 +271,14 @@ class SmallPtrSetIteratorImpl {
 /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
 template<typename PtrTy>
 class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
-  typedef PointerLikeTypeTraits<PtrTy> PtrTraits;
+  using PtrTraits = PointerLikeTypeTraits<PtrTy>;
 
 public:
-  typedef PtrTy                     value_type;
-  typedef PtrTy                     reference;
-  typedef PtrTy                     pointer;
-  typedef std::ptrdiff_t            difference_type;
-  typedef std::forward_iterator_tag iterator_category;
+  using value_type = PtrTy;
+  using reference = PtrTy;
+  using pointer = PtrTy;
+  using difference_type = std::ptrdiff_t;
+  using iterator_category = std::forward_iterator_tag;
 
   explicit SmallPtrSetIterator(const void *const *BP, const void *const *E)
     : SmallPtrSetIteratorImpl(BP, E) {}
@@ -351,8 +349,8 @@ struct RoundUpToPowerOfTwo {
 template <typename PtrType>
 class SmallPtrSetImpl : public SmallPtrSetImplBase {
   using ConstPtrType = typename add_const_past_pointer<PtrType>::type;
-  typedef PointerLikeTypeTraits<PtrType> PtrTraits;
-  typedef PointerLikeTypeTraits<ConstPtrType> ConstPtrTraits;
+  using PtrTraits = PointerLikeTypeTraits<PtrType>;
+  using ConstPtrTraits = PointerLikeTypeTraits<ConstPtrType>;
 
 protected:
   // Constructors that forward to the base.
@@ -365,8 +363,8 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
       : SmallPtrSetImplBase(SmallStorage, SmallSize) {}
 
 public:
-  typedef SmallPtrSetIterator<PtrType> iterator;
-  typedef SmallPtrSetIterator<PtrType> const_iterator;
+  using iterator = SmallPtrSetIterator<PtrType>;
+  using const_iterator = SmallPtrSetIterator<PtrType>;
 
   SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
 
@@ -431,7 +429,7 @@ class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
   // DenseSet<> instead if you expect many elements in the set.
   static_assert(SmallSize <= 32, "SmallSize should be small");
 
-  typedef SmallPtrSetImpl<PtrType> BaseT;
+  using BaseT = SmallPtrSetImpl<PtrType>;
 
   // Make sure that SmallSize is a power of two, round up if not.
   enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index b9588214023c..bd24eab93b50 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -71,7 +71,7 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
   // Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
   // don't want it to be automatically run, so we need to represent the space as
   // something else.  Use an array of char of sufficient alignment.
-  typedef AlignedCharArrayUnion<T> U;
+  using U = AlignedCharArrayUnion<T>;
   U FirstEl;
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
 
@@ -96,19 +96,19 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
   void setEnd(T *P) { this->EndX = P; }
 
 public:
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef T value_type;
-  typedef T *iterator;
-  typedef const T *const_iterator;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using value_type = T;
+  using iterator = T *;
+  using const_iterator = const T *;
 
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-  typedef std::reverse_iterator<iterator> reverse_iterator;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
 
-  typedef T &reference;
-  typedef const T &const_reference;
-  typedef T *pointer;
-  typedef const T *const_pointer;
+  using reference = T &;
+  using const_reference = const T &;
+  using pointer = T *;
+  using const_pointer = const T *;
 
   // forward iterator creation methods.
   LLVM_ATTRIBUTE_ALWAYS_INLINE
@@ -319,12 +319,12 @@ class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
 /// reduce code duplication based on the SmallVector 'N' template parameter.
 template <typename T>
 class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
-  typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
+  using SuperClass = SmallVectorTemplateBase<T, isPodLike<T>::value>;
 
 public:
-  typedef typename SuperClass::iterator iterator;
-  typedef typename SuperClass::const_iterator const_iterator;
-  typedef typename SuperClass::size_type size_type;
+  using iterator = typename SuperClass::iterator;
+  using const_iterator = typename SuperClass::const_iterator;
+  using size_type = typename SuperClass::size_type;
 
 protected:
   // Default ctor - Initialize to empty.
@@ -845,8 +845,7 @@ class SmallVector : public SmallVectorImpl<T> {
   SmallVectorStorage<T, N> Storage;
 
 public:
-  SmallVector() : SmallVectorImpl<T>(N) {
-  }
+  SmallVector() : SmallVectorImpl<T>(N) {}
 
   explicit SmallVector(size_t Size, const T &Value = T())
     : SmallVectorImpl<T>(N) {
@@ -883,16 +882,16 @@ class SmallVector : public SmallVectorImpl<T> {
       SmallVectorImpl<T>::operator=(::std::move(RHS));
   }
 
-  const SmallVector &operator=(SmallVector &&RHS) {
-    SmallVectorImpl<T>::operator=(::std::move(RHS));
-    return *this;
-  }
-
   SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
     if (!RHS.empty())
       SmallVectorImpl<T>::operator=(::std::move(RHS));
   }
 
+  const SmallVector &operator=(SmallVector &&RHS) {
+    SmallVectorImpl<T>::operator=(::std::move(RHS));
+    return *this;
+  }
+
   const SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
     SmallVectorImpl<T>::operator=(::std::move(RHS));
     return *this;
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index a82cef6028f9..4cbf40c76805 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -1,4 +1,4 @@
-//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector -*- C++ -*- ===//
+//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -41,8 +41,8 @@ namespace llvm {
 
 template <unsigned ElementSize = 128> struct SparseBitVectorElement {
 public:
-  typedef unsigned long BitWord;
-  typedef unsigned size_type;
+  using BitWord = unsigned long;
+  using size_type = unsigned;
   enum {
     BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT,
     BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE,
@@ -100,7 +100,7 @@ template <unsigned ElementSize = 128> struct SparseBitVectorElement {
     Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
   }
 
-  bool test_and_set (unsigned Idx) {
+  bool test_and_set(unsigned Idx) {
     bool old = test(Idx);
     if (!old) {
       set(Idx);
@@ -254,9 +254,9 @@ template <unsigned ElementSize = 128> struct SparseBitVectorElement {
 
 template <unsigned ElementSize = 128>
 class SparseBitVector {
-  typedef std::list<SparseBitVectorElement<ElementSize>> ElementList;
-  typedef typename ElementList::iterator ElementListIter;
-  typedef typename ElementList::const_iterator ElementListConstIter;
+  using ElementList = std::list<SparseBitVectorElement<ElementSize>>;
+  using ElementListIter = typename ElementList::iterator;
+  using ElementListConstIter = typename ElementList::const_iterator;
   enum {
     BITWORD_SIZE = SparseBitVectorElement<ElementSize>::BITWORD_SIZE
   };
@@ -421,14 +421,12 @@ class SparseBitVector {
   };
 
 public:
-  typedef SparseBitVectorIterator iterator;
+  using iterator = SparseBitVectorIterator;
 
   SparseBitVector() {
     CurrElementIter = Elements.begin();
   }
 
-  ~SparseBitVector() = default;
-
   // SparseBitVector copy ctor.
   SparseBitVector(const SparseBitVector &RHS) {
     ElementListConstIter ElementIter = RHS.Elements.begin();
@@ -440,6 +438,8 @@ class SparseBitVector {
     CurrElementIter = Elements.begin ();
   }
 
+  ~SparseBitVector() = default;
+
   // Clear.
   void clear() {
     Elements.clear();
diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h
index 08da4b68ebaa..b3a413aa3aa5 100644
--- a/include/llvm/ADT/SparseMultiSet.h
+++ b/include/llvm/ADT/SparseMultiSet.h
@@ -1,4 +1,4 @@
-//===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===//
+//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -101,7 +101,7 @@ class SparseMultiSet {
     unsigned Prev;
     unsigned Next;
 
-    SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { }
+    SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {}
 
     /// List tails have invalid Nexts.
     bool isTail() const {
@@ -118,8 +118,8 @@ class SparseMultiSet {
     bool isValid() const { return Prev != INVALID; }
   };
 
-  typedef typename KeyFunctorT::argument_type KeyT;
-  typedef SmallVector<SMSNode, 8> DenseT;
+  using KeyT = typename KeyFunctorT::argument_type;
+  using DenseT = SmallVector<SMSNode, 8>;
   DenseT Dense;
   SparseT *Sparse = nullptr;
   unsigned Universe = 0;
@@ -183,12 +183,12 @@ class SparseMultiSet {
   }
 
 public:
-  typedef ValueT value_type;
-  typedef ValueT &reference;
-  typedef const ValueT &const_reference;
-  typedef ValueT *pointer;
-  typedef const ValueT *const_pointer;
-  typedef unsigned size_type;
+  using value_type = ValueT;
+  using reference = ValueT &;
+  using const_reference = const ValueT &;
+  using pointer = ValueT *;
+  using const_pointer = const ValueT *;
+  using size_type = unsigned;
 
   SparseMultiSet() = default;
   SparseMultiSet(const SparseMultiSet &) = delete;
@@ -227,7 +227,7 @@ class SparseMultiSet {
     unsigned SparseIdx;
 
     iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
-      : SMS(P), Idx(I), SparseIdx(SI) { }
+      : SMS(P), Idx(I), SparseIdx(SI) {}
 
     /// Whether our iterator has fallen outside our dense vector.
     bool isEnd() const {
@@ -248,11 +248,11 @@ class SparseMultiSet {
     void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
 
   public:
-    typedef std::iterator<std::bidirectional_iterator_tag, ValueT> super;
-    typedef typename super::value_type value_type;
-    typedef typename super::difference_type difference_type;
-    typedef typename super::pointer pointer;
-    typedef typename super::reference reference;
+    using super = std::iterator<std::bidirectional_iterator_tag, ValueT>;
+    using value_type = typename super::value_type;
+    using difference_type = typename super::difference_type;
+    using pointer = typename super::pointer;
+    using reference = typename super::reference;
 
     reference operator*() const {
       assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
@@ -308,11 +308,12 @@ class SparseMultiSet {
       return I;
     }
   };
-  typedef iterator_base<SparseMultiSet *> iterator;
-  typedef iterator_base<const SparseMultiSet *> const_iterator;
+
+  using iterator = iterator_base<SparseMultiSet *>;
+  using const_iterator = iterator_base<const SparseMultiSet *>;
 
   // Convenience types
-  typedef std::pair<iterator, iterator> RangePair;
+  using RangePair = std::pair<iterator, iterator>;
 
   /// Returns an iterator past this container. Note that such an iterator cannot
   /// be decremented, but will compare equal to other end iterators.
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
index 00c18c743219..25ade8831922 100644
--- a/include/llvm/ADT/SparseSet.h
+++ b/include/llvm/ADT/SparseSet.h
@@ -1,4 +1,4 @@
-//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===//
+//===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -125,9 +125,9 @@ class SparseSet {
                 !std::numeric_limits<SparseT>::is_signed,
                 "SparseT must be an unsigned integer type");
 
-  typedef typename KeyFunctorT::argument_type KeyT;
-  typedef SmallVector<ValueT, 8> DenseT;
-  typedef unsigned size_type;
+  using KeyT = typename KeyFunctorT::argument_type;
+  using DenseT = SmallVector<ValueT, 8>;
+  using size_type = unsigned;
   DenseT Dense;
   SparseT *Sparse = nullptr;
   unsigned Universe = 0;
@@ -135,11 +135,11 @@ class SparseSet {
   SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;
 
 public:
-  typedef ValueT value_type;
-  typedef ValueT &reference;
-  typedef const ValueT &const_reference;
-  typedef ValueT *pointer;
-  typedef const ValueT *const_pointer;
+  using value_type = ValueT;
+  using reference = ValueT &;
+  using const_reference = const ValueT &;
+  using pointer = ValueT *;
+  using const_pointer = const ValueT *;
 
   SparseSet() = default;
   SparseSet(const SparseSet &) = delete;
@@ -168,8 +168,8 @@ class SparseSet {
   }
 
   // Import trivial vector stuff from DenseT.
-  typedef typename DenseT::iterator iterator;
-  typedef typename DenseT::const_iterator const_iterator;
+  using iterator = typename DenseT::iterator;
+  using const_iterator = typename DenseT::const_iterator;
 
   const_iterator begin() const { return Dense.begin(); }
   const_iterator end() const { return Dense.end(); }
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 1c109be3bab3..e22a3f688c40 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -1,4 +1,4 @@
-//===-- llvm/ADT/StringExtras.h - Useful string functions -------*- C++ -*-===//
+//===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,12 +15,18 @@
 #define LLVM_ADT_STRINGEXTRAS_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
 #include <iterator>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <utility>
 
 namespace llvm {
-class raw_ostream;
+
 template<typename T> class SmallVectorImpl;
+class raw_ostream;
 
 /// hexdigit - Return the hexadecimal character for the
 /// given number \p X (which should be less than 16).
@@ -128,7 +134,6 @@ static inline std::string utostr(uint64_t X, bool isNeg = false) {
   return std::string(BufPtr, std::end(Buffer));
 }
 
-
 static inline std::string itostr(int64_t X) {
   if (X < 0)
     return utostr(static_cast<uint64_t>(-X), true);
@@ -261,13 +266,14 @@ template <typename A1, typename... Args>
 inline size_t join_items_size(const A1 &A, Args &&... Items) {
   return join_one_item_size(A) + join_items_size(std::forward<Args>(Items)...);
 }
-}
+
+} // end namespace detail
 
 /// Joins the strings in the range [Begin, End), adding Separator between
 /// the elements.
 template <typename IteratorT>
 inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
-  typedef typename std::iterator_traits<IteratorT>::iterator_category tag;
+  using tag = typename std::iterator_traits<IteratorT>::iterator_category;
   return detail::join_impl(Begin, End, Separator, tag());
 }
 
@@ -295,6 +301,6 @@ inline std::string join_items(Sep Separator, Args &&... Items) {
   return Result;
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_STRINGEXTRAS_H
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index c36fda7d6906..d573148665a1 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -1,4 +1,4 @@
-//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===//
+//===- StringMap.h - String Hash table map interface ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,25 +16,23 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <initializer_list>
-#include <new>
+#include <iterator>
 #include <utility>
 
 namespace llvm {
 
-  template<typename ValueT>
-  class StringMapConstIterator;
-  template<typename ValueT>
-  class StringMapIterator;
-  template <typename ValueT> class StringMapKeyIterator;
-  template<typename ValueTy>
-  class StringMapEntry;
+template<typename ValueTy> class StringMapConstIterator;
+template<typename ValueTy> class StringMapIterator;
+template<typename ValueTy> class StringMapKeyIterator;
 
 /// StringMapEntryBase - Shared base class of StringMapEntry instances.
 class StringMapEntryBase {
@@ -53,17 +51,15 @@ class StringMapImpl {
   // Array of NumBuckets pointers to entries, null pointers are holes.
   // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed
   // by an array of the actual hash values as unsigned integers.
-  StringMapEntryBase **TheTable;
-  unsigned NumBuckets;
-  unsigned NumItems;
-  unsigned NumTombstones;
+  StringMapEntryBase **TheTable = nullptr;
+  unsigned NumBuckets = 0;
+  unsigned NumItems = 0;
+  unsigned NumTombstones = 0;
   unsigned ItemSize;
 
 protected:
   explicit StringMapImpl(unsigned itemSize)
-      : TheTable(nullptr),
-        // Initialize the map with zero buckets to allocation.
-        NumBuckets(0), NumItems(0), NumTombstones(0), ItemSize(itemSize) {}
+      : ItemSize(itemSize) {}
   StringMapImpl(StringMapImpl &&RHS)
       : TheTable(RHS.TheTable), NumBuckets(RHS.NumBuckets),
         NumItems(RHS.NumItems), NumTombstones(RHS.NumTombstones),
@@ -225,9 +221,10 @@ class StringMap : public StringMapImpl {
   AllocatorTy Allocator;
 
 public:
-  typedef StringMapEntry<ValueTy> MapEntryTy;
+  using MapEntryTy = StringMapEntry<ValueTy>;
 
   StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
+
   explicit StringMap(unsigned InitialSize)
     : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
 
@@ -248,12 +245,6 @@ class StringMap : public StringMapImpl {
   StringMap(StringMap &&RHS)
       : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {}
 
-  StringMap &operator=(StringMap RHS) {
-    StringMapImpl::swap(RHS);
-    std::swap(Allocator, RHS.Allocator);
-    return *this;
-  }
-
   StringMap(const StringMap &RHS) :
     StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))),
     Allocator(RHS.Allocator) {
@@ -289,16 +280,37 @@ class StringMap : public StringMapImpl {
     // not worthwhile.
   }
 
+  StringMap &operator=(StringMap RHS) {
+    StringMapImpl::swap(RHS);
+    std::swap(Allocator, RHS.Allocator);
+    return *this;
+  }
+
+  ~StringMap() {
+    // Delete all the elements in the map, but don't reset the elements
+    // to default values.  This is a copy of clear(), but avoids unnecessary
+    // work not required in the destructor.
+    if (!empty()) {
+      for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+        StringMapEntryBase *Bucket = TheTable[I];
+        if (Bucket && Bucket != getTombstoneVal()) {
+          static_cast<MapEntryTy*>(Bucket)->Destroy(Allocator);
+        }
+      }
+    }
+    free(TheTable);
+  }
+
   AllocatorTy &getAllocator() { return Allocator; }
   const AllocatorTy &getAllocator() const { return Allocator; }
 
-  typedef const char* key_type;
-  typedef ValueTy mapped_type;
-  typedef StringMapEntry<ValueTy> value_type;
-  typedef size_t size_type;
+  using key_type = const char*;
+  using mapped_type = ValueTy;
+  using value_type = StringMapEntry<ValueTy>;
+  using size_type = size_t;
 
-  typedef StringMapConstIterator<ValueTy> const_iterator;
-  typedef StringMapIterator<ValueTy> iterator;
+  using const_iterator = StringMapConstIterator<ValueTy>;
+  using iterator = StringMapIterator<ValueTy>;
 
   iterator begin() {
     return iterator(TheTable, NumBuckets == 0);
@@ -313,7 +325,7 @@ class StringMap : public StringMapImpl {
     return const_iterator(TheTable+NumBuckets, true);
   }
 
-  llvm::iterator_range<StringMapKeyIterator<ValueTy>> keys() const {
+  iterator_range<StringMapKeyIterator<ValueTy>> keys() const {
     return make_range(StringMapKeyIterator<ValueTy>(begin()),
                       StringMapKeyIterator<ValueTy>(end()));
   }
@@ -433,21 +445,6 @@ class StringMap : public StringMapImpl {
     erase(I);
     return true;
   }
-
-  ~StringMap() {
-    // Delete all the elements in the map, but don't reset the elements
-    // to default values.  This is a copy of clear(), but avoids unnecessary
-    // work not required in the destructor.
-    if (!empty()) {
-      for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
-        StringMapEntryBase *Bucket = TheTable[I];
-        if (Bucket && Bucket != getTombstoneVal()) {
-          static_cast<MapEntryTy*>(Bucket)->Destroy(Allocator);
-        }
-      }
-    }
-    free(TheTable);
-  }
 };
 
 template <typename DerivedTy, typename ValueTy>
@@ -542,7 +539,6 @@ class StringMapKeyIterator
 
 public:
   StringMapKeyIterator() = default;
-
   explicit StringMapKeyIterator(StringMapConstIterator<ValueTy> Iter)
       : base(std::move(Iter)) {}
 
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index ce48f6d3bad3..4b25f56432df 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -1,4 +1,4 @@
-//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
+//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,18 @@
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
 #include <cstring>
 #include <limits>
+#include <type_traits>
 #include <string>
 #include <utility>
 
 namespace llvm {
-  template <typename T>
-  class SmallVectorImpl;
+
   class APInt;
   class hash_code;
+  template <typename T> class SmallVectorImpl;
   class StringRef;
 
   /// Helper functions for StringRef::getAsInteger.
@@ -46,10 +48,11 @@ namespace llvm {
   /// general safe to store a StringRef.
   class StringRef {
   public:
-    typedef const char *iterator;
-    typedef const char *const_iterator;
     static const size_t npos = ~size_t(0);
-    typedef size_t size_type;
+
+    using iterator = const char *;
+    using const_iterator = const char *;
+    using size_type = size_t;
 
   private:
     /// The start of the string, in an external buffer.
@@ -906,6 +909,7 @@ namespace llvm {
   // StringRefs can be treated like a POD type.
   template <typename T> struct isPodLike;
   template <> struct isPodLike<StringRef> { static const bool value = true; };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_ADT_STRINGREF_H
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index c32c2a497438..9af44c07df79 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -1,4 +1,4 @@
-//===--- StringSet.h - The LLVM Compiler Driver -----------------*- C++ -*-===//
+//===- StringSet.h - The LLVM Compiler Driver -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,13 +15,19 @@
 #define LLVM_ADT_STRINGSET_H
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <initializer_list>
+#include <utility>
 
 namespace llvm {
 
   /// StringSet - A wrapper for StringMap that provides set-like functionality.
-  template <class AllocatorTy = llvm::MallocAllocator>
-  class StringSet : public llvm::StringMap<char, AllocatorTy> {
-    typedef llvm::StringMap<char, AllocatorTy> base;
+  template <class AllocatorTy = MallocAllocator>
+  class StringSet : public StringMap<char, AllocatorTy> {
+    using base = StringMap<char, AllocatorTy>;
+
   public:
     StringSet() = default;
     StringSet(std::initializer_list<StringRef> S) {
@@ -40,6 +46,7 @@ namespace llvm {
         base::insert(std::make_pair(*It, '\0'));
     }
   };
-}
+
+} // end namespace llvm
 
 #endif // LLVM_ADT_STRINGSET_H
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index ca43b6046193..79740713f75b 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -30,9 +30,9 @@ namespace llvm {
 template <typename EltTy>
 class TinyPtrVector {
 public:
-  typedef SmallVector<EltTy, 4> VecTy;
-  typedef typename VecTy::value_type value_type;
-  typedef PointerUnion<EltTy, VecTy *> PtrUnion;
+  using VecTy = SmallVector<EltTy, 4>;
+  using value_type = typename VecTy::value_type;
+  using PtrUnion = PointerUnion<EltTy, VecTy *>;
 
 private:
   PtrUnion Val;
@@ -167,10 +167,10 @@ class TinyPtrVector {
     return Val.template get<VecTy*>()->size();
   }
 
-  typedef EltTy *iterator;
-  typedef const EltTy *const_iterator;
-  typedef std::reverse_iterator<iterator> reverse_iterator;
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  using iterator = EltTy *;
+  using const_iterator = const EltTy *;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
 
   iterator begin() {
     if (Val.template is<EltTy>())
diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h
index e1ab4b56023f..b17fb2392baf 100644
--- a/include/llvm/ADT/UniqueVector.h
+++ b/include/llvm/ADT/UniqueVector.h
@@ -1,4 +1,4 @@
-//===-- llvm/ADT/UniqueVector.h ---------------------------------*- C++ -*-===//
+//===- llvm/ADT/UniqueVector.h ----------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,16 +24,15 @@ namespace llvm {
 /// Entries can be fetched using operator[] with the entry ID.
 template<class T> class UniqueVector {
 public:
-  typedef typename std::vector<T> VectorType;
-  typedef typename VectorType::iterator iterator;
-  typedef typename VectorType::const_iterator const_iterator;
+  using VectorType = typename std::vector<T>;
+  using iterator = typename VectorType::iterator;
+  using const_iterator = typename VectorType::const_iterator;
 
 private:
   // Map - Used to handle the correspondence of entry to ID.
   std::map<T, unsigned> Map;
 
   // Vector - ID ordered vector of entries. Entries can be indexed by ID - 1.
-  //
   VectorType Vector;
 
 public:
@@ -68,7 +67,6 @@ template<class T> class UniqueVector {
   }
 
   /// operator[] - Returns a reference to the entry with the specified ID.
-  ///
   const T &operator[](unsigned ID) const {
     assert(ID-1 < size() && "ID is 0 or out of range!");
     return Vector[ID - 1];
@@ -87,21 +85,18 @@ template<class T> class UniqueVector {
   const_iterator end() const { return Vector.end(); }
 
   /// size - Returns the number of entries in the vector.
-  ///
   size_t size() const { return Vector.size(); }
 
   /// empty - Returns true if the vector is empty.
-  ///
   bool empty() const { return Vector.empty(); }
 
   /// reset - Clears all the entries.
-  ///
   void reset() {
     Map.clear();
     Vector.resize(0, 0);
   }
 };
 
-} // End of namespace llvm
+} // end namespace llvm
 
 #endif // LLVM_ADT_UNIQUEVECTOR_H
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index c5f97083af4d..6aaabe1d1889 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -55,6 +55,21 @@ class ProfileSummaryInfo {
   ProfileSummaryInfo(ProfileSummaryInfo &&Arg)
       : M(Arg.M), Summary(std::move(Arg.Summary)) {}
 
+  /// \brief Returns true if profile summary is available.
+  bool hasProfileSummary() { return computeSummary(); }
+
+  /// \brief Returns true if module \c M has sample profile.
+  bool hasSampleProfile() {
+    return hasProfileSummary() &&
+           Summary->getKind() == ProfileSummary::PSK_Sample;
+  }
+
+  /// \brief Returns true if module \c M has instrumentation profile.
+  bool hasInstrumentationProfile() {
+    return hasProfileSummary() &&
+           Summary->getKind() == ProfileSummary::PSK_Instr;
+  }
+
   /// Handle the invalidation of this information.
   ///
   /// When used as a result of \c ProfileSummaryAnalysis this method will be
diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index f3122f0bf7f0..6d9f345755ab 100644
--- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -28,7 +28,7 @@ class CVTypeVisitor {
 
   Error visitTypeRecord(CVType &Record, TypeIndex Index);
   Error visitTypeRecord(CVType &Record);
-  Error visitMemberRecord(CVMemberRecord &Record);
+  Error visitMemberRecord(CVMemberRecord Record);
 
   /// Visits the type records in Data. Sets the error flag on parse failures.
   Error visitTypeStream(const CVTypeArray &Types);
@@ -47,6 +47,36 @@ class CVTypeVisitor {
   TinyPtrVector<TypeServerHandler *> Handlers;
 };
 
+enum VisitorDataSource {
+  VDS_BytesPresent, // The record bytes are passed into the the visitation
+                    // function.  The algorithm should first deserialize them
+                    // before passing them on through the pipeline.
+  VDS_BytesExternal // The record bytes are not present, and it is the
+                    // responsibility of the visitor callback interface to
+                    // supply the bytes.
+};
+
+Error visitTypeRecord(CVType &Record, TypeIndex Index,
+                      TypeVisitorCallbacks &Callbacks,
+                      VisitorDataSource Source = VDS_BytesPresent,
+                      TypeServerHandler *TS = nullptr);
+Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks,
+                      VisitorDataSource Source = VDS_BytesPresent,
+                      TypeServerHandler *TS = nullptr);
+
+Error visitMemberRecord(CVMemberRecord Record, TypeVisitorCallbacks &Callbacks,
+                        VisitorDataSource Source = VDS_BytesPresent);
+Error visitMemberRecord(TypeLeafKind Kind, ArrayRef<uint8_t> Record,
+                        TypeVisitorCallbacks &Callbacks);
+
+Error visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
+                              TypeVisitorCallbacks &Callbacks);
+
+Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks,
+                      TypeServerHandler *TS = nullptr);
+Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks,
+                      TypeServerHandler *TS = nullptr);
+
 } // end namespace codeview
 } // end namespace llvm
 
diff --git a/include/llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h b/include/llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h
index 35a8010f1163..21288df89be2 100644
--- a/include/llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h
@@ -11,13 +11,10 @@
 #define LLVM_DEBUGINFO_CODEVIEW_RANDOMACCESSTYPEVISITOR_H
 
 #include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabase.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -73,18 +70,6 @@ class RandomAccessTypeVisitor {
   /// The database visitor which adds new records to the database.
   TypeDatabaseVisitor DatabaseVisitor;
 
-  /// The deserializer which deserializes new records.
-  TypeDeserializer Deserializer;
-
-  /// The visitation callback pipeline to use.  By default this contains a
-  /// deserializer and a type database visitor.  But the callback specified
-  /// in the constructor is also added.
-  TypeVisitorCallbackPipeline Pipeline;
-
-  /// The visitor used to visit the internal pipeline for deserialization and
-  /// database maintenance.
-  CVTypeVisitor InternalVisitor;
-
   /// A vector mapping type indices to type offset.  For every record that has
   /// been visited, contains the absolute offset of that record in the record
   /// array.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index 5919aaddea40..c3953b62d780 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -31,10 +31,10 @@ struct DWARFAttribute {
   dwarf::Attribute Attr;
   /// The form and value for this attribute.
   DWARFFormValue Value;
-  
+
   DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0),
                  dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {}
-  
+
   bool isValid() const {
     return Offset != 0 && Attr != dwarf::Attribute(0);
   }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 40eb7e9a8836..2d82104ea098 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -22,19 +22,19 @@ class raw_ostream;
 class DWARFDebugArangeSet {
 public:
   struct Header {
-    // The total length of the entries for that set, not including the length
-    // field itself.
+    /// The total length of the entries for that set, not including the length
+    /// field itself.
     uint32_t Length;
-    // The offset from the beginning of the .debug_info section of the
-    // compilation unit entry referenced by the table.
+    /// The offset from the beginning of the .debug_info section of the
+    /// compilation unit entry referenced by the table.
     uint32_t CuOffset;
-    // The DWARF version number.
+    /// The DWARF version number.
     uint16_t Version;
-    // The size in bytes of an address on the target architecture. For segmented
-    // addressing, this is the size of the offset portion of the address.
+    /// The size in bytes of an address on the target architecture. For segmented
+    /// addressing, this is the size of the offset portion of the address.
     uint8_t AddrSize;
-    // The size in bytes of a segment descriptor on the target architecture.
-    // If the target system uses a flat address space, this value is 0.
+    /// The size in bytes of a segment descriptor on the target architecture.
+    /// If the target system uses a flat address space, this value is 0.
     uint8_t SegSize;
   };
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index c06771d6afb4..2237aa361d18 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -28,7 +28,7 @@ class DWARFDebugAranges {
   void clear();
   void extract(DataExtractor DebugArangesData);
 
-  // Call appendRange multiple times and then call construct.
+  /// Call appendRange multiple times and then call construct.
   void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
   void construct();
 
@@ -58,9 +58,9 @@ class DWARFDebugAranges {
       return LowPC < other.LowPC;
     }
 
-    uint64_t LowPC; // Start of address range.
-    uint32_t Length; // End of address range (not including this address).
-    uint32_t CUOffset; // Offset of the compile unit or die.
+    uint64_t LowPC; /// Start of address range.
+    uint32_t Length; /// End of address range (not including this address).
+    uint32_t CUOffset; /// Offset of the compile unit or die.
   };
 
   struct RangeEndpoint {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 23a573b7a9fa..95ec1be62a79 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -33,31 +33,31 @@ typedef std::vector<DWARFAddressRange> DWARFAddressRangesVector;
 class DWARFDebugRangeList {
 public:
   struct RangeListEntry {
-    // A beginning address offset. This address offset has the size of an
-    // address and is relative to the applicable base address of the
-    // compilation unit referencing this range list. It marks the beginning
-    // of an address range.
+    /// A beginning address offset. This address offset has the size of an
+    /// address and is relative to the applicable base address of the
+    /// compilation unit referencing this range list. It marks the beginning
+    /// of an address range.
     uint64_t StartAddress;
-    // An ending address offset. This address offset again has the size of
-    // an address and is relative to the applicable base address of the
-    // compilation unit referencing this range list. It marks the first
-    // address past the end of the address range. The ending address must
-    // be greater than or equal to the beginning address.
+    /// An ending address offset. This address offset again has the size of
+    /// an address and is relative to the applicable base address of the
+    /// compilation unit referencing this range list. It marks the first
+    /// address past the end of the address range. The ending address must
+    /// be greater than or equal to the beginning address.
     uint64_t EndAddress;
 
-    // The end of any given range list is marked by an end of list entry,
-    // which consists of a 0 for the beginning address offset
-    // and a 0 for the ending address offset.
+    /// The end of any given range list is marked by an end of list entry,
+    /// which consists of a 0 for the beginning address offset
+    /// and a 0 for the ending address offset.
     bool isEndOfListEntry() const {
       return (StartAddress == 0) && (EndAddress == 0);
     }
 
-    // A base address selection entry consists of:
-    // 1. The value of the largest representable address offset
-    // (for example, 0xffffffff when the size of an address is 32 bits).
-    // 2. An address, which defines the appropriate base address for
-    // use in interpreting the beginning and ending address offsets of
-    // subsequent entries of the location list.
+    /// A base address selection entry consists of:
+    /// 1. The value of the largest representable address offset
+    /// (for example, 0xffffffff when the size of an address is 32 bits).
+    /// 2. An address, which defines the appropriate base address for
+    /// use in interpreting the beginning and ending address offsets of
+    /// subsequent entries of the location list.
     bool isBaseAddressSelectionEntry(uint8_t AddressSize) const {
       assert(AddressSize == 4 || AddressSize == 8);
       if (AddressSize == 4)
@@ -68,7 +68,7 @@ class DWARFDebugRangeList {
   };
 
 private:
-  // Offset in .debug_ranges section.
+  /// Offset in .debug_ranges section.
   uint32_t Offset;
   uint8_t AddressSize;
   std::vector<RangeListEntry> Entries;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index ee06125ea278..ca94a90fabfc 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -24,10 +24,10 @@
 #include <iterator>
 
 namespace llvm {
-    
+
 class DWARFUnit;
 class raw_ostream;
-  
+
 //===----------------------------------------------------------------------===//
 /// Utility class that carries the DWARF compile/type unit and the debug info
 /// entry in an object.
@@ -47,7 +47,7 @@ class DWARFDie {
 public:
   DWARFDie() = default;
   DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {}
-  
+
   bool isValid() const { return U && Die; }
   explicit operator bool() const { return isValid(); }
   const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; }
@@ -68,7 +68,7 @@ class DWARFDie {
     assert(isValid() && "must check validity prior to calling");
     return Die->getOffset();
   }
-  
+
   dwarf::Tag getTag() const {
     auto AbbrevDecl = getAbbreviationDeclarationPtr();
     if (AbbrevDecl)
@@ -80,7 +80,7 @@ class DWARFDie {
     assert(isValid() && "must check validity prior to calling");
     return Die->hasChildren();
   }
-  
+
   /// Returns true for a valid DIE that terminates a sibling chain.
   bool isNULL() const {
     return getAbbreviationDeclarationPtr() == nullptr;
@@ -97,13 +97,13 @@ class DWARFDie {
   /// \returns a valid DWARFDie instance if this object has a parent or an
   /// invalid DWARFDie instance if it doesn't.
   DWARFDie getParent() const;
-  
+
   /// Get the sibling of this DIE object.
   ///
   /// \returns a valid DWARFDie instance if this object has a sibling or an
   /// invalid DWARFDie instance if it doesn't.
   DWARFDie getSibling() const;
-  
+
   /// Get the first child of this DIE object.
   ///
   /// \returns a valid DWARFDie instance if this object has children or an
@@ -113,7 +113,7 @@ class DWARFDie {
       return DWARFDie(U, Die + 1);
     return DWARFDie();
   }
-  
+
   /// Dump the DIE and all of its attributes to the supplied stream.
   ///
   /// \param OS the stream to use for output.
@@ -121,7 +121,7 @@ class DWARFDie {
   /// children.
   /// \param indent the number of characters to indent each line that is output.
   void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const;
-  
+
   /// Extract the specified attribute from this DIE.
   ///
   /// Extract an attribute value from this DIE only. This call doesn't look
@@ -132,7 +132,7 @@ class DWARFDie {
   /// \returns an optional DWARFFormValue that will have the form value if the
   /// attribute was successfully extracted.
   Optional<DWARFFormValue> find(dwarf::Attribute Attr) const;
-  
+
   /// Extract the first value of any attribute in Attrs from this DIE.
   ///
   /// Extract the first attribute that matches from this DIE only. This call
@@ -180,7 +180,7 @@ class DWARFDie {
   ///
   /// \returns anm optional absolute section offset value for the attribute.
   Optional<uint64_t> getRangesBaseAttribute() const;
-  
+
   /// Get the DW_AT_high_pc attribute value as an address.
   ///
   /// In DWARF version 4 and later the high PC can be encoded as an offset from
@@ -196,7 +196,7 @@ class DWARFDie {
   /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
   /// Returns true if both attributes are present.
   bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const;
-  
+
   /// Get the address ranges for this DIE.
   ///
   /// Get the hi/low PC range if both attributes are available or exrtracts the
@@ -208,7 +208,7 @@ class DWARFDie {
   /// \returns a address range vector that might be empty if no address range
   /// information is available.
   DWARFAddressRangesVector getAddressRanges() const;
-  
+
   /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any
   /// of its children.
   ///
@@ -218,19 +218,19 @@ class DWARFDie {
   ///
   /// \param Ranges the addres range vector to fill in.
   void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const;
-  
+
   bool addressRangeContainsAddress(const uint64_t Address) const;
-  
+
   /// If a DIE represents a subprogram (or inlined subroutine), returns its
   /// mangled name (or short name, if mangled is missing). This name may be
   /// fetched from specification or abstract origin for this subprogram.
   /// Returns null if no name is found.
   const char *getSubroutineName(DINameKind Kind) const;
-  
+
   /// Return the DIE name resolving DW_AT_sepcification or DW_AT_abstract_origin
   /// references if necessary. Returns null if no name is found.
   const char *getName(DINameKind Kind) const;
-  
+
   /// Returns the declaration line (start line) for a DIE, assuming it specifies
   /// a subprogram. This may be fetched from specification or abstract origin
   /// for this subprogram by resolving DW_AT_sepcification or
@@ -251,21 +251,21 @@ class DWARFDie {
   /// there is no DW_AT_GNU_discriminator attribute in this DIE.
   void getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
                       uint32_t &CallColumn, uint32_t &CallDiscriminator) const;
-  
+
   class attribute_iterator;
 
   /// Get an iterator range to all attributes in the current DIE only.
   ///
   /// \returns an iterator range for the attributes of the current DIE.
   iterator_range<attribute_iterator> attributes() const;
-  
+
   class iterator;
-  
+
   iterator begin() const;
   iterator end() const;
   iterator_range<iterator> children() const;
 };
-  
+
 class DWARFDie::attribute_iterator :
     public iterator_facade_base<attribute_iterator, std::forward_iterator_tag,
                                 const DWARFAttribute> {
@@ -275,7 +275,7 @@ class DWARFDie::attribute_iterator :
   DWARFAttribute AttrValue;
   /// The attribute index within the abbreviation declaration in Die.
   uint32_t Index;
-  
+
   /// Update the attribute index and attempt to read the attribute value. If the
   /// attribute is able to be read, update AttrValue and the Index member
   /// variable. If the attribute value is not able to be read, an appropriate
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index f3516ebdecba..a30e0be9c3c3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -49,9 +49,9 @@ class DWARFFormValue {
     const uint8_t *data = nullptr;
   };
 
-  dwarf::Form Form;             // Form for this value.
-  ValueType Value;              // Contains all data for the form.
-  const DWARFUnit *U = nullptr; // Remember the DWARFUnit at extract time.
+  dwarf::Form Form;             /// Form for this value.
+  ValueType Value;              /// Contains all data for the form.
+  const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time.
 
 public:
   DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {}
@@ -72,11 +72,14 @@ class DWARFFormValue {
   const DWARFUnit *getUnit() const { return U; }
   void dump(raw_ostream &OS) const;
 
-  /// \brief extracts a value in data at offset *offset_ptr.
+  /// Extracts a value in \p Data at offset \p *OffsetPtr.
   ///
   /// The passed DWARFUnit is allowed to be nullptr, in which
   /// case no relocation processing will be performed and some
   /// kind of forms that depend on Unit information are disallowed.
+  /// \param Data The DataExtractor to use.
+  /// \param OffsetPtr The offset within DataExtractor where the data starts.
+  /// \param U The optional DWARFUnit supplying information for some forms.
   /// \returns whether the extraction succeeded.
   bool extractValue(const DataExtractor &Data, uint32_t *OffsetPtr,
                     const DWARFUnit *U);
diff --git a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 7a52218663b9..8d1ac5c83c23 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -29,25 +29,25 @@ class DWARFGdbIndex {
   uint32_t ConstantPoolOffset;
 
   struct CompUnitEntry {
-    uint64_t Offset; // Offset of a CU in the .debug_info section.
-    uint64_t Length; // Length of that CU.
+    uint64_t Offset; /// Offset of a CU in the .debug_info section.
+    uint64_t Length; /// Length of that CU.
   };
   SmallVector<CompUnitEntry, 0> CuList;
 
   struct AddressEntry {
-    uint64_t LowAddress;  // The low address.
-    uint64_t HighAddress; // The high address.
-    uint32_t CuIndex;     // The CU index.
+    uint64_t LowAddress;  /// The low address.
+    uint64_t HighAddress; /// The high address.
+    uint32_t CuIndex;     /// The CU index.
   };
   SmallVector<AddressEntry, 0> AddressArea;
 
   struct SymTableEntry {
-    uint32_t NameOffset; // Offset of the symbol's name in the constant pool.
-    uint32_t VecOffset;  // Offset of the CU vector in the constant pool.
+    uint32_t NameOffset; /// Offset of the symbol's name in the constant pool.
+    uint32_t VecOffset;  /// Offset of the CU vector in the constant pool.
   };
   SmallVector<SymTableEntry, 0> SymbolTable;
 
-  // Each value is CU index + attributes.
+  /// Each value is CU index + attributes.
   SmallVector<std::pair<uint32_t, SmallVector<uint32_t, 0>>, 0>
       ConstantPoolVectors;
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
index f1e03bb4c2e1..ec0397a0fb09 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
@@ -17,15 +17,14 @@
 namespace llvm {
 
 struct RelocAddrEntry {
-  uint8_t Width;
   int64_t Value;
 };
 
-// In place of applying the relocations to the data we've read from disk we use
-// a separate mapping table to the side and checking that at locations in the
-// dwarf where we expect relocated values. This adds a bit of complexity to the
-// dwarf parsing/extraction at the benefit of not allocating memory for the
-// entire size of the debug info sections.
+/// In place of applying the relocations to the data we've read from disk we use
+/// a separate mapping table to the side and checking that at locations in the
+/// dwarf where we expect relocated values. This adds a bit of complexity to the
+/// dwarf parsing/extraction at the benefit of not allocating memory for the
+/// entire size of the debug info sections.
 typedef DenseMap<uint64_t, RelocAddrEntry> RelocAddrMap;
 
 } // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 68e541bac73c..c15e27f36a8b 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -111,7 +111,7 @@ class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
 
 class DWARFUnit {
   DWARFContext &Context;
-  // Section containing this DWARFUnit.
+  /// Section containing this DWARFUnit.
   const DWARFSection &InfoSection;
 
   const DWARFDebugAbbrev *Abbrev;
@@ -133,12 +133,12 @@ class DWARFUnit {
   uint8_t UnitType;
   uint8_t AddrSize;
   uint64_t BaseAddr;
-  // The compile unit debug information entry items.
+  /// The compile unit debug information entry items.
   std::vector<DWARFDebugInfoEntry> DieArray;
 
-  // Map from range's start address to end address and corresponding DIE.
-  // IntervalMap does not support range removal, as a result, we use the
-  // std::map::upper_bound for address range lookup.
+  /// Map from range's start address to end address and corresponding DIE.
+  /// IntervalMap does not support range removal, as a result, we use the
+  /// std::map::upper_bound for address range lookup.
   std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
   typedef iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>
       die_iterator_range;
@@ -189,7 +189,7 @@ class DWARFUnit {
     AddrOffsetSectionBase = Base;
   }
 
-  // Recursively update address to Die map.
+  /// Recursively update address to Die map.
   void updateAddressDieMap(DWARFDie Die);
 
   void setRangesSection(const DWARFSection *RS, uint32_t Base) {
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index 4579cbf4227b..c5549983ed43 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -51,6 +51,7 @@ class TpiStream {
   HashTable &getHashAdjusters();
 
   codeview::CVTypeRange types(bool *HadError) const;
+  const codeview::CVTypeArray &typeArray() const { return TypeRecords; }
 
   Error commit();
 
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 64240a929782..6321bb81b8cb 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1132,4 +1132,6 @@ def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">,
 
 def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
       Intrinsic<[llvm_i64_ty], [], []>;
+
+def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
 }
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index a06c67fe814c..071ec2edb538 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -62,6 +62,7 @@ def : GINodeEquiv<G_FMUL, fmul>;
 def : GINodeEquiv<G_FDIV, fdiv>;
 def : GINodeEquiv<G_FREM, frem>;
 def : GINodeEquiv<G_FPOW, fpow>;
+def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
 def : GINodeEquiv<G_BR, br>;
 
 // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a4672efeedd6..e4d58bf1b4eb 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -2984,7 +2984,7 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
                                SmallVectorImpl<Constraint> &Constraints,
                                bool &Consistent) {
   bool Result = false;
-  for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+  for (unsigned LI : Loops.set_bits()) {
     DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
     DEBUG(Constraints[LI].dump(dbgs()));
     if (Constraints[LI].isDistance())
@@ -3266,7 +3266,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
 // For debugging purposes, dump a small bit vector to dbgs().
 static void dumpSmallBitVector(SmallBitVector &BV) {
   dbgs() << "{";
-  for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+  for (unsigned VI : BV.set_bits()) {
     dbgs() << VI;
     if (BV.find_next(VI) >= 0)
       dbgs() << ' ';
@@ -3506,7 +3506,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   NewConstraint.setAny(SE);
 
   // test separable subscripts
-  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+  for (unsigned SI : Separable.set_bits()) {
     DEBUG(dbgs() << "testing subscript " << SI);
     switch (Pair[SI].Classification) {
     case Subscript::ZIV:
@@ -3545,14 +3545,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
     for (unsigned II = 0; II <= MaxLevels; ++II)
       Constraints[II].setAny(SE);
-    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+    for (unsigned SI : Coupled.set_bits()) {
       DEBUG(dbgs() << "testing subscript group " << SI << " { ");
       SmallBitVector Group(Pair[SI].Group);
       SmallBitVector Sivs(Pairs);
       SmallBitVector Mivs(Pairs);
       SmallBitVector ConstrainedLevels(MaxLevels + 1);
       SmallVector<Subscript *, 4> PairsInGroup;
-      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+      for (unsigned SJ : Group.set_bits()) {
         DEBUG(dbgs() << SJ << " ");
         if (Pair[SJ].Classification == Subscript::SIV)
           Sivs.set(SJ);
@@ -3564,7 +3564,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       DEBUG(dbgs() << "}\n");
       while (Sivs.any()) {
         bool Changed = false;
-        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+        for (unsigned SJ : Sivs.set_bits()) {
           DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
@@ -3588,7 +3588,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
           DEBUG(dbgs() << "    propagating\n");
           DEBUG(dbgs() << "\tMivs = ");
           DEBUG(dumpSmallBitVector(Mivs));
-          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+          for (unsigned SJ : Mivs.set_bits()) {
             // SJ is an MIV subscript that's part of the current coupled group
             DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
             if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
@@ -3622,7 +3622,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       }
 
       // test & propagate remaining RDIVs
-      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+      for (unsigned SJ : Mivs.set_bits()) {
         if (Pair[SJ].Classification == Subscript::RDIV) {
           DEBUG(dbgs() << "RDIV test\n");
           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
@@ -3635,7 +3635,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       // test remaining MIVs
       // This code is temporary.
       // Better to somehow test all remaining subscripts simultaneously.
-      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+      for (unsigned SJ : Mivs.set_bits()) {
         if (Pair[SJ].Classification == Subscript::MIV) {
           DEBUG(dbgs() << "MIV test\n");
           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
@@ -3647,9 +3647,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
 
       // update Result.DV from constraint vector
       DEBUG(dbgs() << "    updating\n");
-      for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
-           SJ = ConstrainedLevels.find_next(SJ)) {
-        if (SJ > (int)CommonLevels)
+      for (unsigned SJ : ConstrainedLevels.set_bits()) {
+        if (SJ > CommonLevels)
           break;
         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
@@ -3859,7 +3858,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
   NewConstraint.setAny(SE);
 
   // test separable subscripts
-  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+  for (unsigned SI : Separable.set_bits()) {
     switch (Pair[SI].Classification) {
     case Subscript::SIV: {
       unsigned Level;
@@ -3886,12 +3885,12 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
     for (unsigned II = 0; II <= MaxLevels; ++II)
       Constraints[II].setAny(SE);
-    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+    for (unsigned SI : Coupled.set_bits()) {
       SmallBitVector Group(Pair[SI].Group);
       SmallBitVector Sivs(Pairs);
       SmallBitVector Mivs(Pairs);
       SmallBitVector ConstrainedLevels(MaxLevels + 1);
-      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+      for (unsigned SJ : Group.set_bits()) {
         if (Pair[SJ].Classification == Subscript::SIV)
           Sivs.set(SJ);
         else
@@ -3899,7 +3898,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
       }
       while (Sivs.any()) {
         bool Changed = false;
-        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+        for (unsigned SJ : Sivs.set_bits()) {
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
           const SCEV *SplitIter = nullptr;
@@ -3914,7 +3913,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
         }
         if (Changed) {
           // propagate, possibly creating new SIVs and ZIVs
-          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+          for (unsigned SJ : Mivs.set_bits()) {
             // SJ is an MIV subscript that's part of the current coupled group
             if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
                           Pair[SJ].Loops, Constraints, Result.Consistent)) {
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 44c14cb17c22..4702569126c6 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -669,21 +669,33 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
     if (PSI) {
       BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
-      if (PSI->isHotCallSite(CS, CallerBFI)) {
-        DEBUG(dbgs() << "Hot callsite.\n");
-        Threshold = Params.HotCallSiteThreshold.getValue();
-      } else if (PSI->isFunctionEntryHot(&Callee)) {
-        DEBUG(dbgs() << "Hot callee.\n");
-        // If callsite hotness can not be determined, we may still know
-        // that the callee is hot and treat it as a weaker hint for threshold
-        // increase.
-        Threshold = MaxIfValid(Threshold, Params.HintThreshold);
-      } else if (PSI->isColdCallSite(CS, CallerBFI)) {
-        DEBUG(dbgs() << "Cold callsite.\n");
-        Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
-      } else if (PSI->isFunctionEntryCold(&Callee)) {
-        DEBUG(dbgs() << "Cold callee.\n");
-        Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+      // FIXME: After switching to the new passmanager, simplify the logic below
+      // by checking only the callsite hotness/coldness. The check for CallerBFI
+      // exists only because we do not have BFI available with the old PM.
+      //
+      // Use callee's hotness information only if we have no way of determining
+      // callsite's hotness information. Callsite hotness can be determined if
+      // sample profile is used (which adds hotness metadata to calls) or if
+      // caller's BlockFrequencyInfo is available.
+      if (CallerBFI || PSI->hasSampleProfile()) {
+        if (PSI->isHotCallSite(CS, CallerBFI)) {
+          DEBUG(dbgs() << "Hot callsite.\n");
+          Threshold = Params.HotCallSiteThreshold.getValue();
+        } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+          DEBUG(dbgs() << "Cold callsite.\n");
+          Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
+        }
+      } else {
+        if (PSI->isFunctionEntryHot(&Callee)) {
+          DEBUG(dbgs() << "Hot callee.\n");
+          // If callsite hotness can not be determined, we may still know
+          // that the callee is hot and treat it as a weaker hint for threshold
+          // increase.
+          Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+        } else if (PSI->isFunctionEntryCold(&Callee)) {
+          DEBUG(dbgs() << "Cold callee.\n");
+          Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+        }
       }
     }
   }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 5728887cc1e9..5652248a60ce 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1752,6 +1752,24 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       (A == Op0 || B == Op0))
     return Op0;
 
+  // A mask that only clears known zeros of a shifted value is a no-op.
+  Value *X;
+  const APInt *Mask;
+  const APInt *ShAmt;
+  if (match(Op1, m_APInt(Mask))) {
+    // If all bits in the inverted and shifted mask are clear:
+    // and (shl X, ShAmt), Mask --> shl X, ShAmt
+    if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
+        (~(*Mask)).lshr(*ShAmt).isNullValue())
+      return Op0;
+
+    // If all bits in the inverted and shifted mask are clear:
+    // and (lshr X, ShAmt), Mask --> lshr X, ShAmt
+    if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+        (~(*Mask)).shl(*ShAmt).isNullValue())
+      return Op0;
+  }
+
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 502f4205b689..12b86daa602b 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -75,7 +75,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
     return None;
   assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
          "We can only get profile count for call/invoke instruction.");
-  if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) {
+  if (hasSampleProfile()) {
     // In sample PGO mode, check if there is a profile metadata on the
     // instruction. If it is present, determine hotness solely based on that,
     // since the sampled entry count may not be accurate.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 800354d2f5b4..a746ddfd7a63 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -629,19 +629,19 @@ static int CompareSCEVComplexity(
     const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
     const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
 
-    // If there is a dominance relationship between the loops, sort by the
-    // dominance. Otherwise, sort by depth. We require such order in getAddExpr.
+    // There is always a dominance between two recs that are used by one SCEV,
+    // so we can safely sort recs by loop header dominance. We require such
+    // order in getAddExpr.
     const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
     if (LLoop != RLoop) {
       const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
       assert(LHead != RHead && "Two loops share the same header?");
       if (DT.dominates(LHead, RHead))
         return 1;
-      else if (DT.dominates(RHead, LHead))
-        return -1;
-      unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
-      if (LDepth != RDepth)
-        return (int)LDepth - (int)RDepth;
+      else
+        assert(DT.dominates(RHead, LHead) &&
+               "No dominance between recurrences used by one SCEV?");
+      return -1;
     }
 
     // Addrec complexity grows with operand count.
@@ -2512,22 +2512,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                                AddRec->op_end());
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-             ++OtherIdx)
-          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
-            if (OtherAddRec->getLoop() == AddRecLoop) {
-              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
-                   i != e; ++i) {
-                if (i >= AddRecOps.size()) {
-                  AddRecOps.append(OtherAddRec->op_begin()+i,
-                                   OtherAddRec->op_end());
-                  break;
-                }
-                SmallVector<const SCEV *, 2> TwoOps = {
-                    AddRecOps[i], OtherAddRec->getOperand(i)};
-                AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+             ++OtherIdx) {
+          const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+          if (OtherAddRec->getLoop() == AddRecLoop) {
+            for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                 i != e; ++i) {
+              if (i >= AddRecOps.size()) {
+                AddRecOps.append(OtherAddRec->op_begin()+i,
+                                 OtherAddRec->op_end());
+                break;
               }
-              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+              SmallVector<const SCEV *, 2> TwoOps = {
+                  AddRecOps[i], OtherAddRec->getOperand(i)};
+              AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
             }
+            Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+          }
+        }
         // Step size has changed, so we cannot guarantee no self-wraparound.
         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
         return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 3a57772cc7f5..43b245c66400 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -128,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
    }
 
   DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
-  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
-             r = CriticalPathSet.find_next(r))
+  DEBUG(for (unsigned r : CriticalPathSet.set_bits())
           dbgs() << " " << TRI->getName(r));
   DEBUG(dbgs() << '\n');
 }
@@ -571,7 +570,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 
       DEBUG({
         dbgs() << " ::";
-        for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+        for (unsigned r : BV.set_bits())
           dbgs() << " " << TRI->getName(r);
         dbgs() << "\n";
       });
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 98163bffb60b..7d945690e9c3 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -501,7 +501,7 @@ void CodeViewDebug::emitTypeInformation() {
       Error E = Reader.readArray(Types, Reader.getLength());
       if (!E) {
         TypeVisitorCallbacks C;
-        E = CVTypeVisitor(C).visitTypeStream(Types);
+        E = codeview::visitTypeStream(Types, C);
       }
       if (E) {
         logAllUnhandledErrors(std::move(E), errs(), "error: ");
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 22fd7bb46056..20e1467b30c3 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -209,8 +209,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
           } else if (MO.isRegMask()) {
             // If this is a register mask operand, clobber all debug values in
             // non-CSRs.
-            for (int I = ChangingRegs.find_first(); I != -1;
-                 I = ChangingRegs.find_next(I)) {
+            for (unsigned I : ChangingRegs.set_bits()) {
               // Don't consider SP to be clobbered by register masks.
               if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
                   MO.clobbersPhysReg(I)) {
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 811858f136eb..77dfb13ac1f2 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1129,6 +1129,11 @@ void IRTranslator::finalizeFunction() {
   ValToVReg.clear();
   FrameIndices.clear();
   MachinePreds.clear();
+  // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
+  // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
+  // destroying it twice (in ~IRTranslator() and ~LLVMContext())
+  EntryBuilder = MachineIRBuilder();
+  CurBuilder = MachineIRBuilder();
 }
 
 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index ab433273b189..b53b002f55a6 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -760,7 +760,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 
   const MachineFrameInfo &MFI = MF->getFrameInfo();
   BitVector PR = MFI.getPristineRegs(*MF);
-  for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+  for (unsigned I : PR.set_bits()) {
     for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
          SubRegs.isValid(); ++SubRegs)
       regsLive.insert(*SubRegs);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 06500289c971..47d726f6da7a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -285,8 +285,7 @@ class RAGreedy : public MachineFunctionPass,
     // Set B[i] = C for every live bundle where B[i] was NoCand.
     unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
       unsigned Count = 0;
-      for (int i = LiveBundles.find_first(); i >= 0;
-           i = LiveBundles.find_next(i))
+      for (unsigned i : LiveBundles.set_bits())
         if (B[i] == NoCand) {
           B[i] = C;
           Count++;
@@ -1162,9 +1161,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
   }
 
   DEBUG({
-    for (int i = Cand.LiveBundles.find_first(); i>=0;
-         i = Cand.LiveBundles.find_next(i))
-    dbgs() << " EB#" << i;
+    for (int i : Cand.LiveBundles.set_bits())
+      dbgs() << " EB#" << i;
     dbgs() << ".\n";
   });
   return true;
@@ -1482,8 +1480,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
     DEBUG({
       dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
                                 << " with bundles";
-      for (int i = Cand.LiveBundles.find_first(); i>=0;
-           i = Cand.LiveBundles.find_next(i))
+      for (int i : Cand.LiveBundles.set_bits())
         dbgs() << " EB#" << i;
       dbgs() << ".\n";
     });
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index caf5cb497a71..0ccee175abfb 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13087,14 +13087,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     }
   }
 
-  // If this is a store followed by a store with the same value to the same
-  // location, then the store is dead/noop.
   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
-    if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
-        ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
-        ST1->isUnindexed() && !ST1->isVolatile()) {
-      // The store is dead, remove it.
-      return Chain;
+    if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+        !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+        ST->getMemoryVT() == ST1->getMemoryVT()) {
+      // If this is a store followed by a store with the same value to the same
+      // location, then the store is dead/noop.
+      if (ST1->getValue() == Value) {
+        // The store is dead, remove it.
+        return Chain;
+      }
+
+      // If this is a store who's preceeding store to the same location
+      // and no one other node is chained to that store we can effectively
+      // drop the store. Do not remove stores to undef as they may be used as
+      // data sinks.
+      if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+          !ST1->getBasePtr().isUndef()) {
+        // ST1 is fully overwritten and can be elided. Combine with it's chain
+        // value.
+        CombineTo(ST1, ST1->getChain());
+        return SDValue();
+      }
     }
   }
 
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index f10c98ef4e50..43cbf4add0f8 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -310,7 +310,7 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
 
 bool SpillPlacement::scanActiveBundles() {
   RecentPositive.clear();
-  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+  for (unsigned n : ActiveNodes->set_bits()) {
     update(n);
     // A node that must spill, or a node without any links is not going to
     // change its value ever again, so exclude it from iterations.
@@ -365,7 +365,7 @@ SpillPlacement::finish() {
 
   // Write preferences back to ActiveNodes.
   bool Perfect = true;
-  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+  for (unsigned n : ActiveNodes->set_bits())
     if (!nodes[n].preferReg()) {
       ActiveNodes->reset(n);
       Perfect = false;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index f51d959a089a..86a16187fcb6 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -703,12 +703,10 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
 
     // Create the interval of the blocks that we previously found to be 'alive'.
     BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
-    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
-         pos = MBBLiveness.LiveIn.find_next(pos)) {
+    for (unsigned pos : MBBLiveness.LiveIn.set_bits()) {
       Starts[pos] = Indexes->getMBBStartIdx(&MBB);
     }
-    for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
-         pos = MBBLiveness.LiveOut.find_next(pos)) {
+    for (unsigned pos : MBBLiveness.LiveOut.set_bits()) {
       Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
     }
 
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 39aa946fa840..5f63fd4320bb 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1312,7 +1312,7 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
 
   // Find the first legal register class with the largest spill size.
   const TargetRegisterClass *BestRC = RC;
-  for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+  for (unsigned i : SuperRegRC.set_bits()) {
     const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
     // We want the largest possible spill size.
     if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index e6c5d8753b83..9724cb074584 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -564,6 +564,14 @@ void TargetPassConfig::addISelPrepare() {
     addPass(createVerifierPass());
 }
 
+/// -regalloc=... command line option.
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+               RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+         cl::init(&useDefaultRegisterAllocator),
+         cl::desc("Register allocator to use"));
+
 /// Add the complete set of target-independent postISel code generator passes.
 ///
 /// This can be read as the standard order of major LLVM CodeGen stages. Stages
@@ -625,8 +633,12 @@ void TargetPassConfig::addMachinePasses() {
   // including phi elimination and scheduling.
   if (getOptimizeRegAlloc())
     addOptimizedRegAlloc(createRegAllocPass(true));
-  else
+  else {
+    if (RegAlloc != &useDefaultRegisterAllocator &&
+        RegAlloc != &createFastRegisterAllocator)
+      report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
     addFastRegAlloc(createRegAllocPass(false));
+  }
 
   // Run post-ra passes.
   addPostRegAlloc();
@@ -759,19 +771,12 @@ MachinePassRegistry RegisterRegAlloc::Registry;
 /// A dummy default pass factory indicates whether the register allocator is
 /// overridden on the command line.
 static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
-static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+
 static RegisterRegAlloc
 defaultRegAlloc("default",
                 "pick register allocator based on -O option",
                 useDefaultRegisterAllocator);
 
-/// -regalloc=... command line option.
-static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
-               RegisterPassParser<RegisterRegAlloc> >
-RegAlloc("regalloc",
-         cl::init(&useDefaultRegisterAllocator),
-         cl::desc("Register allocator to use"));
-
 static void initializeDefaultRegisterAllocatorOnce() {
   RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
 
@@ -781,7 +786,6 @@ static void initializeDefaultRegisterAllocatorOnce() {
   }
 }
 
-
 /// Instantiate the default register allocator pass for this target for either
 /// the optimized or unoptimized allocation path. This will be added to the pass
 /// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index f6e4c17d514c..41ec082a24cf 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -50,8 +50,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
     ArrayRef<MCPhysReg> Exceptions) const {
   // Check that all super registers of reserved regs are reserved as well.
   BitVector Checked(getNumRegs());
-  for (int Reg = RegisterSet.find_first(); Reg>=0;
-       Reg = RegisterSet.find_next(Reg)) {
+  for (unsigned Reg : RegisterSet.set_bits()) {
     if (Checked[Reg])
       continue;
     for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
diff --git a/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
index bcc8218d9446..02e1682f76e7 100644
--- a/lib/DebugInfo/CodeView/CVTypeDumper.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
@@ -11,7 +11,6 @@
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabase.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/Support/BinaryByteStream.h"
@@ -21,38 +20,23 @@ using namespace llvm::codeview;
 
 Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) {
   TypeDatabaseVisitor DBV(TypeDB);
-  TypeDeserializer Deserializer;
   TypeVisitorCallbackPipeline Pipeline;
-  Pipeline.addCallbackToPipeline(Deserializer);
   Pipeline.addCallbackToPipeline(DBV);
   Pipeline.addCallbackToPipeline(Dumper);
 
-  CVTypeVisitor Visitor(Pipeline);
-  if (Handler)
-    Visitor.addTypeServerHandler(*Handler);
-
   CVType RecordCopy = Record;
-  if (auto EC = Visitor.visitTypeRecord(RecordCopy))
-    return EC;
-  return Error::success();
+  return codeview::visitTypeRecord(RecordCopy, Pipeline, VDS_BytesPresent,
+                                   Handler);
 }
 
 Error CVTypeDumper::dump(const CVTypeArray &Types,
                          TypeVisitorCallbacks &Dumper) {
   TypeDatabaseVisitor DBV(TypeDB);
-  TypeDeserializer Deserializer;
   TypeVisitorCallbackPipeline Pipeline;
-  Pipeline.addCallbackToPipeline(Deserializer);
   Pipeline.addCallbackToPipeline(DBV);
   Pipeline.addCallbackToPipeline(Dumper);
 
-  CVTypeVisitor Visitor(Pipeline);
-  if (Handler)
-    Visitor.addTypeServerHandler(*Handler);
-
-  if (auto EC = Visitor.visitTypeStream(Types))
-    return EC;
-  return Error::success();
+  return codeview::visitTypeStream(Types, Pipeline, Handler);
 }
 
 Error CVTypeDumper::dump(ArrayRef<uint8_t> Data, TypeVisitorCallbacks &Dumper) {
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index b6ed0453d9c4..0f7f5f667790 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -59,13 +59,8 @@ static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
   };
 
   TypeServer2Record R(TypeRecordKind::TypeServer2);
-  TypeDeserializer Deserializer;
   StealTypeServerVisitor Thief(R);
-  TypeVisitorCallbackPipeline Pipeline;
-  Pipeline.addCallbackToPipeline(Deserializer);
-  Pipeline.addCallbackToPipeline(Thief);
-  CVTypeVisitor Visitor(Pipeline);
-  if (auto EC = Visitor.visitTypeRecord(Record))
+  if (auto EC = visitTypeRecord(Record, Thief))
     return std::move(EC);
 
   return R;
@@ -178,7 +173,7 @@ static Error visitMemberRecord(CVMemberRecord &Record,
   return Error::success();
 }
 
-Error CVTypeVisitor::visitMemberRecord(CVMemberRecord &Record) {
+Error CVTypeVisitor::visitMemberRecord(CVMemberRecord Record) {
   return ::visitMemberRecord(Record, Callbacks);
 }
 
@@ -224,3 +219,93 @@ Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef<uint8_t> Data) {
   BinaryStreamReader SR(S);
   return visitFieldListMemberStream(SR);
 }
+
+namespace {
+struct FieldListVisitHelper {
+  FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef<uint8_t> Data,
+                       VisitorDataSource Source)
+      : Stream(Data, llvm::support::little), Reader(Stream),
+        Deserializer(Reader),
+        Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+    if (Source == VDS_BytesPresent) {
+      Pipeline.addCallbackToPipeline(Deserializer);
+      Pipeline.addCallbackToPipeline(Callbacks);
+    }
+  }
+
+  BinaryByteStream Stream;
+  BinaryStreamReader Reader;
+  FieldListDeserializer Deserializer;
+  TypeVisitorCallbackPipeline Pipeline;
+  CVTypeVisitor Visitor;
+};
+
+struct VisitHelper {
+  VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source,
+              TypeServerHandler *TS)
+      : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+    if (TS)
+      Visitor.addTypeServerHandler(*TS);
+    if (Source == VDS_BytesPresent) {
+      Pipeline.addCallbackToPipeline(Deserializer);
+      Pipeline.addCallbackToPipeline(Callbacks);
+    }
+  }
+
+  TypeDeserializer Deserializer;
+  TypeVisitorCallbackPipeline Pipeline;
+  CVTypeVisitor Visitor;
+};
+}
+
+Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
+                                      TypeVisitorCallbacks &Callbacks,
+                                      VisitorDataSource Source,
+                                      TypeServerHandler *TS) {
+  VisitHelper Helper(Callbacks, Source, TS);
+  return Helper.Visitor.visitTypeRecord(Record, Index);
+}
+
+Error llvm::codeview::visitTypeRecord(CVType &Record,
+                                      TypeVisitorCallbacks &Callbacks,
+                                      VisitorDataSource Source,
+                                      TypeServerHandler *TS) {
+  VisitHelper Helper(Callbacks, Source, TS);
+  return Helper.Visitor.visitTypeRecord(Record);
+}
+
+Error llvm::codeview::visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
+                                              TypeVisitorCallbacks &Callbacks) {
+  CVTypeVisitor Visitor(Callbacks);
+  return Visitor.visitFieldListMemberStream(FieldList);
+}
+
+Error llvm::codeview::visitMemberRecord(CVMemberRecord Record,
+                                        TypeVisitorCallbacks &Callbacks,
+                                        VisitorDataSource Source) {
+  FieldListVisitHelper Helper(Callbacks, Record.Data, Source);
+  return Helper.Visitor.visitMemberRecord(Record);
+}
+
+Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind,
+                                        ArrayRef<uint8_t> Record,
+                                        TypeVisitorCallbacks &Callbacks) {
+  CVMemberRecord R;
+  R.Data = Record;
+  R.Kind = Kind;
+  return visitMemberRecord(R, Callbacks, VDS_BytesPresent);
+}
+
+Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
+                                      TypeVisitorCallbacks &Callbacks,
+                                      TypeServerHandler *TS) {
+  VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
+  return Helper.Visitor.visitTypeStream(Types);
+}
+
+Error llvm::codeview::visitTypeStream(CVTypeRange Types,
+                                      TypeVisitorCallbacks &Callbacks,
+                                      TypeServerHandler *TS) {
+  VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
+  return Helper.Visitor.visitTypeStream(Types);
+}
diff --git a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp b/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
index 4cb9acbe07d9..704d1131108a 100644
--- a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h"
 
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabase.h"
 #include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
@@ -20,9 +21,7 @@ RandomAccessTypeVisitor::RandomAccessTypeVisitor(
     const CVTypeArray &Types, uint32_t NumRecords,
     PartialOffsetArray PartialOffsets)
     : Database(NumRecords), Types(Types), DatabaseVisitor(Database),
-      InternalVisitor(Pipeline), PartialOffsets(PartialOffsets) {
-  Pipeline.addCallbackToPipeline(Deserializer);
-  Pipeline.addCallbackToPipeline(DatabaseVisitor);
+      PartialOffsets(PartialOffsets) {
 
   KnownOffsets.resize(Database.capacity());
 }
@@ -38,8 +37,7 @@ Error RandomAccessTypeVisitor::visitTypeIndex(TypeIndex TI,
 
   assert(Database.contains(TI));
   auto &Record = Database.getTypeRecord(TI);
-  CVTypeVisitor V(Callbacks);
-  return V.visitTypeRecord(Record, TI);
+  return codeview::visitTypeRecord(Record, TI, Callbacks);
 }
 
 Error RandomAccessTypeVisitor::visitRangeForType(TypeIndex TI) {
@@ -78,7 +76,7 @@ Error RandomAccessTypeVisitor::visitRange(TypeIndex Begin, uint32_t BeginOffset,
 
   while (Begin != End) {
     assert(!Database.contains(Begin));
-    if (auto EC = InternalVisitor.visitTypeRecord(*RI, Begin))
+    if (auto EC = codeview::visitTypeRecord(*RI, Begin, DatabaseVisitor))
       return EC;
     KnownOffsets[Begin.toArrayIndex()] = BeginOffset;
 
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 27a6e0987886..9485c9cfedff 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -216,8 +216,7 @@ Error TypeDumpVisitor::visitMemberEnd(CVMemberRecord &Record) {
 
 Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
                                         FieldListRecord &FieldList) {
-  CVTypeVisitor Visitor(*this);
-  if (auto EC = Visitor.visitFieldListMemberStream(FieldList.Data))
+  if (auto EC = codeview::visitMemberRecordStream(FieldList.Data, *this))
     return EC;
 
   return Error::success();
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index aad20ae6dda1..51f24fa3f135 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -361,8 +361,7 @@ Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) {
   // Visit the members inside the field list.
   HadUntranslatedMember = false;
   FieldListBuilder.begin();
-  CVTypeVisitor Visitor(*this);
-  if (auto EC = Visitor.visitFieldListMemberStream(R.Data))
+  if (auto EC = codeview::visitMemberRecordStream(R.Data, *this))
     return EC;
 
   // Write the record if we translated all field list members.
@@ -440,18 +439,9 @@ Error TypeStreamMerger::visitUnknownType(CVType &Rec) {
 
 Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
   assert(IndexMap.empty());
-  TypeVisitorCallbackPipeline Pipeline;
   LastError = Error::success();
 
-  TypeDeserializer Deserializer;
-  Pipeline.addCallbackToPipeline(Deserializer);
-  Pipeline.addCallbackToPipeline(*this);
-
-  CVTypeVisitor Visitor(Pipeline);
-  if (Handler)
-    Visitor.addTypeServerHandler(*Handler);
-
-  if (auto EC = Visitor.visitTypeStream(Types))
+  if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
     return EC;
 
   // If we found bad indices but no other errors, try doing another pass and see
@@ -466,7 +456,8 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
     IsSecondPass = true;
     NumBadIndices = 0;
     CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
-    if (auto EC = Visitor.visitTypeStream(Types))
+
+    if (auto EC = codeview::visitTypeStream(Types, *this, Handler))
       return EC;
 
     assert(NumBadIndices <= BadIndicesRemaining &&
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 59a060d143ff..61e75a2b56ab 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1086,49 +1086,32 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
         continue;
     }
 
+    if (Section.relocation_begin() == Section.relocation_end())
+      continue;
+
     std::map<SymbolRef, uint64_t> AddrCache;
-    if (Section.relocation_begin() != Section.relocation_end()) {
-      uint64_t SectionSize = RelocatedSection->getSize();
-      for (const RelocationRef &Reloc : Section.relocations()) {
-        // FIXME: it's not clear how to correctly handle scattered
-        // relocations.
-        if (isRelocScattered(Obj, Reloc))
-          continue;
+    for (const RelocationRef &Reloc : Section.relocations()) {
+      // FIXME: it's not clear how to correctly handle scattered
+      // relocations.
+      if (isRelocScattered(Obj, Reloc))
+        continue;
 
-        Expected<uint64_t> SymAddrOrErr =
-            getSymbolAddress(Obj, Reloc, L, AddrCache);
-        if (!SymAddrOrErr) {
-          errs() << toString(SymAddrOrErr.takeError()) << '\n';
-          continue;
-        }
-
-        object::RelocVisitor V(Obj);
-        object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
-        if (V.error()) {
-          SmallString<32> Name;
-          Reloc.getTypeName(Name);
-          errs() << "error: failed to compute relocation: "
-                 << Name << "\n";
-          continue;
-        }
-        uint64_t Address = Reloc.getOffset();
-        if (Address + R.Width > SectionSize) {
-          errs() << "error: " << R.Width << "-byte relocation starting "
-                 << Address << " bytes into section " << name << " which is "
-                 << SectionSize << " bytes long.\n";
-          continue;
-        }
-        if (R.Width > 8) {
-          errs() << "error: can't handle a relocation of more than 8 bytes at "
-                    "a time.\n";
-          continue;
-        }
-        DEBUG(dbgs() << "Writing " << format("%p", R.Value)
-                     << " at " << format("%p", Address)
-                     << " with width " << format("%d", R.Width)
-                     << "\n");
-        Map->insert({Address, {(uint8_t)R.Width, R.Value}});
+      Expected<uint64_t> SymAddrOrErr =
+          getSymbolAddress(Obj, Reloc, L, AddrCache);
+      if (!SymAddrOrErr) {
+        errs() << toString(SymAddrOrErr.takeError()) << '\n';
+        continue;
       }
+
+      object::RelocVisitor V(Obj);
+      object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
+      if (V.error()) {
+        SmallString<32> Name;
+        Reloc.getTypeName(Name);
+        errs() << "error: failed to compute relocation: " << Name << "\n";
+        continue;
+      }
+      Map->insert({Reloc.getOffset(), {R.Value}});
     }
   }
 }
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
index 629f3e80b0ed..cb783cf4fea7 100644
--- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
@@ -55,9 +55,8 @@ PDBTypeServerHandler::handleInternal(PDBFile &File,
   auto ExpectedTpi = File.getPDBTpiStream();
   if (!ExpectedTpi)
     return ExpectedTpi.takeError();
-  CVTypeVisitor Visitor(Callbacks);
 
-  if (auto EC = Visitor.visitTypeStream(ExpectedTpi->types(nullptr)))
+  if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks))
     return std::move(EC);
 
   return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index e9a4b71c903d..ab86e5d6a0fd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -705,7 +705,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
   unsigned PaddingSize = 0;
   unsigned StubBufSize = 0;
-  bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections;
+  bool IsRequired = isRequiredForExecution(Section);
   bool IsVirtual = Section.isVirtual();
   bool IsZeroInit = isZeroInit(Section);
   bool IsReadOnly = isReadOnlyData(Section);
@@ -745,8 +745,8 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
     Alignment = std::max(Alignment, getStubAlignment());
 
   // Some sections, such as debug info, don't need to be loaded for execution.
-  // Leave those where they are.
-  if (IsRequired) {
+  // Process those only if explicitly requested.
+  if (IsRequired || ProcessAllSections) {
     Allocate = DataSize + PaddingSize + StubBufSize;
     if (!Allocate)
       Allocate = 1;
@@ -790,6 +790,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   Sections.push_back(
       SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
 
+  // Debug info sections are linked as if their load address was zero
+  if (!IsRequired)
+    Sections.back().setLoadAddress(0);
+
   if (Checker)
     Checker->registerSection(Obj.getFileName(), SectionID);
 
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 98865f5e065e..bd38dd88201f 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -78,6 +78,9 @@ static bool gCrashRecoveryEnabled = false;
 static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
        tlIsRecoveringFromCrash;
 
+static void installExceptionOrSignalHandlers();
+static void uninstallExceptionOrSignalHandlers();
+
 CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
 
 CrashRecoveryContext::~CrashRecoveryContext() {
@@ -113,6 +116,23 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
   return CRCI->CRC;
 }
 
+void CrashRecoveryContext::Enable() {
+  sys::ScopedLock L(*gCrashRecoveryContextMutex);
+  // FIXME: Shouldn't this be a refcount or something?
+  if (gCrashRecoveryEnabled)
+    return;
+  gCrashRecoveryEnabled = true;
+  installExceptionOrSignalHandlers();
+}
+
+void CrashRecoveryContext::Disable() {
+  sys::ScopedLock L(*gCrashRecoveryContextMutex);
+  if (!gCrashRecoveryEnabled)
+    return;
+  gCrashRecoveryEnabled = false;
+  uninstallExceptionOrSignalHandlers();
+}
+
 void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
 {
   if (!cleanup)
@@ -140,30 +160,70 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
   delete cleanup;
 }
 
-#ifdef LLVM_ON_WIN32
+#if defined(_MSC_VER)
+// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
+// better than VEH. Vectored exception handling catches all exceptions happening
+// on the thread with installed exception handlers, so it can interfere with
+// internal exception handling of other libraries on that thread. SEH works
+// exactly as you would expect normal exception handling to work: it only
+// catches exceptions if they would bubble out from the stack frame with __try /
+// __except.
 
-#include "Windows/WindowsSupport.h"
+static void installExceptionOrSignalHandlers() {}
+static void uninstallExceptionOrSignalHandlers() {}
 
-// On Windows, we can make use of vectored exception handling to
-// catch most crashing situations.  Note that this does mean
-// we will be alerted of exceptions *before* structured exception
-// handling has the opportunity to catch it.  But that isn't likely
-// to cause problems because nowhere in the project is SEH being
-// used.
+bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
+  if (!gCrashRecoveryEnabled) {
+    Fn();
+    return true;
+  }
+
+  bool Result = true;
+  __try {
+    Fn();
+  } __except (1) { // Catch any exception.
+    Result = false;
+  }
+  return Result;
+}
+
+#else // !_MSC_VER
+
+#if defined(LLVM_ON_WIN32)
+// This is a non-MSVC compiler, probably mingw gcc or clang without
+// -fms-extensions. Use vectored exception handling (VEH).
 //
-// Vectored exception handling is built on top of SEH, and so it
-// works on a per-thread basis.
+// On Windows, we can make use of vectored exception handling to catch most
+// crashing situations.  Note that this does mean we will be alerted of
+// exceptions *before* structured exception handling has the opportunity to
+// catch it. Unfortunately, this causes problems in practice with other code
+// running on threads with LLVM crash recovery contexts, so we would like to
+// eventually move away from VEH.
+//
+// Vectored works on a per-thread basis, which is an advantage over
+// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have
+// any native support for chaining exception handlers, but VEH allows more than
+// one.
 //
 // The vectored exception handler functionality was added in Windows
 // XP, so if support for older versions of Windows is required,
 // it will have to be added.
-//
-// If we want to support as far back as Win2k, we could use the
-// SetUnhandledExceptionFilter API, but there's a risk of that
-// being entirely overwritten (it's not a chain).
+
+#include "Windows/WindowsSupport.h"
 
 static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
 {
+  // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported
+  // compilers and platforms, so we define it manually.
+  constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL;
+  switch (ExceptionInfo->ExceptionRecord->ExceptionCode)
+  {
+  case DBG_PRINTEXCEPTION_C:
+  case DbgPrintExceptionWideC:
+  case 0x406D1388:  // set debugger thread name
+    return EXCEPTION_CONTINUE_EXECUTION;
+  }
+
   // Lookup the current thread local recovery object.
   const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
 
@@ -192,14 +252,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
 // non-NULL, valid VEH handles, or NULL.
 static sys::ThreadLocal<const void> sCurrentExceptionHandle;
 
-void CrashRecoveryContext::Enable() {
-  sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
-  if (gCrashRecoveryEnabled)
-    return;
-
-  gCrashRecoveryEnabled = true;
-
+static void installExceptionOrSignalHandlers() {
   // We can set up vectored exception handling now.  We will install our
   // handler as the front of the list, though there's no assurances that
   // it will remain at the front (another call could install itself before
@@ -208,14 +261,7 @@ void CrashRecoveryContext::Enable() {
   sCurrentExceptionHandle.set(handle);
 }
 
-void CrashRecoveryContext::Disable() {
-  sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
-  if (!gCrashRecoveryEnabled)
-    return;
-
-  gCrashRecoveryEnabled = false;
-
+static void uninstallExceptionOrSignalHandlers() {
   PVOID currentHandle = const_cast<PVOID>(sCurrentExceptionHandle.get());
   if (currentHandle) {
     // Now we can remove the vectored exception handler from the chain
@@ -226,7 +272,7 @@ void CrashRecoveryContext::Disable() {
   }
 }
 
-#else
+#else // !LLVM_ON_WIN32
 
 // Generic POSIX implementation.
 //
@@ -278,14 +324,7 @@ static void CrashRecoverySignalHandler(int Signal) {
     const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
 }
 
-void CrashRecoveryContext::Enable() {
-  sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
-  if (gCrashRecoveryEnabled)
-    return;
-
-  gCrashRecoveryEnabled = true;
-
+static void installExceptionOrSignalHandlers() {
   // Setup the signal handler.
   struct sigaction Handler;
   Handler.sa_handler = CrashRecoverySignalHandler;
@@ -297,20 +336,13 @@ void CrashRecoveryContext::Enable() {
   }
 }
 
-void CrashRecoveryContext::Disable() {
-  sys::ScopedLock L(*gCrashRecoveryContextMutex);
-
-  if (!gCrashRecoveryEnabled)
-    return;
-
-  gCrashRecoveryEnabled = false;
-
+static void uninstallExceptionOrSignalHandlers() {
   // Restore the previous signal handlers.
   for (unsigned i = 0; i != NumSignals; ++i)
     sigaction(Signals[i], &PrevActions[i], nullptr);
 }
 
-#endif
+#endif // !LLVM_ON_WIN32
 
 bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
   // If crash recovery is disabled, do nothing.
@@ -328,6 +360,8 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
   return true;
 }
 
+#endif // !_MSC_VER
+
 void CrashRecoveryContext::HandleCrash() {
   CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
   assert(CRCI && "Crash recovery context never initialized!");
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index cdea09be41e0..fa28ba1b6ab6 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -103,16 +103,13 @@
 #define STATVFS_F_FLAG(vfs) (vfs).f_flags
 #endif
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
-#include <sys/sysctl.h>
-#endif
-
 using namespace llvm;
 
 namespace llvm {
 namespace sys  {
 namespace fs {
-#if defined(__Bitrig__) || defined(__OpenBSD__) || defined(__minix) || \
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+    defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
     defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) || \
     defined(_AIX)
 static int
@@ -167,7 +164,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
   free(pv);
   return nullptr;
 }
-#endif // Bitrig || OpenBSD || minix || linux || CYGWIN || DragonFly || AIX
+#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
 
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
@@ -183,24 +180,9 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
     if (realpath(exe_path, link_path))
       return link_path;
   }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
-  int mib[4];
-  mib[0] = CTL_KERN;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-  mib[1] = KERN_PROC;
-  mib[2] = KERN_PROC_PATHNAME;
-  mib[3] = -1;
-#else
-  mib[1] = KERN_PROC_ARGS;
-  mib[2] = -1;
-  mib[3] = KERN_PROC_PATHNAME;
-#endif
-  char exe_path[PATH_MAX];
-  size_t cb = sizeof(exe_path);
-  if (sysctl(mib, 4, exe_path, &cb, NULL, 0) == 0)
-    return exe_path;
-#elif defined(__Bitrig__) || defined(__OpenBSD__) || defined(__minix) || \
-      defined(__DragonFly__) || defined(_AIX)
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
+      defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \
+      defined(__FreeBSD_kernel__) || defined(_AIX)
   char exe_path[PATH_MAX];
 
   if (getprogpath(exe_path, argv0) != NULL)
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index dc916c034661..1aec602a2a36 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1158,8 +1158,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
-        for (int Reg = SavedRegs.find_first(); Reg != -1;
-             Reg = SavedRegs.find_next(Reg))
+        for (unsigned Reg : SavedRegs.set_bits())
           dbgs() << ' ' << PrintReg(Reg, RegInfo);
         dbgs() << "\n";);
 
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f7c2e122390..1af36086ad90 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -553,7 +553,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::INTRINSIC_VOID);
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
-  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
 
   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
@@ -659,6 +658,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 
+    // Vector reductions
+    for (MVT VT : MVT::integer_valuetypes()) {
+      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+    }
+    for (MVT VT : MVT::fp_valuetypes()) {
+      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+    }
+
     setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
     setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
     // Likewise, narrowing and extending vector loads/stores aren't handled
@@ -2606,6 +2618,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerMUL(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN:
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    return LowerVECREDUCE(Op, DAG);
   }
 }
 
@@ -7128,6 +7148,47 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
   return Cmp;
 }
 
+static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
+                                  SelectionDAG &DAG) {
+  SDValue VecOp = ScalarOp.getOperand(0);
+  auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
+                     DAG.getConstant(0, DL, MVT::i64));
+}
+
+SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  switch (Op.getOpcode()) {
+  case ISD::VECREDUCE_ADD:
+    return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
+  case ISD::VECREDUCE_SMAX:
+    return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
+  case ISD::VECREDUCE_SMIN:
+    return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
+  case ISD::VECREDUCE_UMAX:
+    return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
+  case ISD::VECREDUCE_UMIN:
+    return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
+  case ISD::VECREDUCE_FMAX: {
+    assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag");
+    return DAG.getNode(
+        ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
+        DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
+        Op.getOperand(0));
+  }
+  case ISD::VECREDUCE_FMIN: {
+    assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag");
+    return DAG.getNode(
+        ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
+        DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
+        Op.getOperand(0));
+  }
+  default:
+    llvm_unreachable("Unhandled reduction");
+  }
+}
+
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
@@ -9490,266 +9551,6 @@ static SDValue performSTORECombine(SDNode *N,
   return SDValue();
 }
 
-/// This function handles the log2-shuffle pattern produced by the
-/// LoopVectorizer for the across vector reduction. It consists of
-/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
-/// are reduced, where s is an induction variable from 0 to
-/// log2(NumVectorElements).
-static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
-                                                     unsigned Op,
-                                                     SelectionDAG &DAG) {
-  EVT VTy = OpV->getOperand(0).getValueType();
-  if (!VTy.isVector())
-    return SDValue();
-
-  int NumVecElts = VTy.getVectorNumElements();
-  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
-    if (NumVecElts != 4)
-      return SDValue();
-  } else {
-    if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
-      return SDValue();
-  }
-
-  int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
-  SDValue PreOp = OpV;
-  // Iterate over each step of the across vector reduction.
-  for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
-    SDValue CurOp = PreOp.getOperand(0);
-    SDValue Shuffle = PreOp.getOperand(1);
-    if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
-      // Try to swap the 1st and 2nd operand as add and min/max instructions
-      // are commutative.
-      CurOp = PreOp.getOperand(1);
-      Shuffle = PreOp.getOperand(0);
-      if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
-        return SDValue();
-    }
-
-    // Check if the input vector is fed by the operator we want to handle,
-    // except the last step; the very first input vector is not necessarily
-    // the same operator we are handling.
-    if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
-      return SDValue();
-
-    // Check if it forms one step of the across vector reduction.
-    // E.g.,
-    //   %cur = add %1, %0
-    //   %shuffle = vector_shuffle %cur, <2, 3, u, u>
-    //   %pre = add %cur, %shuffle
-    if (Shuffle.getOperand(0) != CurOp)
-      return SDValue();
-
-    int NumMaskElts = 1 << CurStep;
-    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
-    // Check mask values in each step.
-    // We expect the shuffle mask in each step follows a specific pattern
-    // denoted here by the <M, U> form, where M is a sequence of integers
-    // starting from NumMaskElts, increasing by 1, and the number integers
-    // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
-    // of undef in U should be NumVecElts - NumMaskElts.
-    // E.g., for <8 x i16>, mask values in each step should be :
-    //   step 0 : <1,u,u,u,u,u,u,u>
-    //   step 1 : <2,3,u,u,u,u,u,u>
-    //   step 2 : <4,5,6,7,u,u,u,u>
-    for (int i = 0; i < NumVecElts; ++i)
-      if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
-          (i >= NumMaskElts && !(Mask[i] < 0)))
-        return SDValue();
-
-    PreOp = CurOp;
-  }
-  unsigned Opcode;
-  bool IsIntrinsic = false;
-
-  switch (Op) {
-  default:
-    llvm_unreachable("Unexpected operator for across vector reduction");
-  case ISD::ADD:
-    Opcode = AArch64ISD::UADDV;
-    break;
-  case ISD::SMAX:
-    Opcode = AArch64ISD::SMAXV;
-    break;
-  case ISD::UMAX:
-    Opcode = AArch64ISD::UMAXV;
-    break;
-  case ISD::SMIN:
-    Opcode = AArch64ISD::SMINV;
-    break;
-  case ISD::UMIN:
-    Opcode = AArch64ISD::UMINV;
-    break;
-  case ISD::FMAXNUM:
-    Opcode = Intrinsic::aarch64_neon_fmaxnmv;
-    IsIntrinsic = true;
-    break;
-  case ISD::FMINNUM:
-    Opcode = Intrinsic::aarch64_neon_fminnmv;
-    IsIntrinsic = true;
-    break;
-  }
-  SDLoc DL(N);
-
-  return IsIntrinsic
-             ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
-                           DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
-             : DAG.getNode(
-                   ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
-                   DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
-                   DAG.getConstant(0, DL, MVT::i64));
-}
-
-/// Target-specific DAG combine for the across vector min/max reductions.
-/// This function specifically handles the final clean-up step of the vector
-/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
-/// pattern, which narrows down and finds the final min/max value from all
-/// elements of the vector.
-/// For example, for a <16 x i8> vector :
-///   svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
-///   %smax0 = smax %arr, svn0
-///   %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
-///   %smax1 = smax %smax0, %svn1
-///   %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
-///   %smax2 = smax %smax1, svn2
-///   %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
-///   %sc = setcc %smax2, %svn3, gt
-///   %n0 = extract_vector_elt %sc, #0
-///   %n1 = extract_vector_elt %smax2, #0
-///   %n2 = extract_vector_elt $smax2, #1
-///   %result = select %n0, %n1, n2
-///     becomes :
-///   %1 = smaxv %0
-///   %result = extract_vector_elt %1, 0
-static SDValue
-performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
-                                        const AArch64Subtarget *Subtarget) {
-  if (!Subtarget->hasNEON())
-    return SDValue();
-
-  SDValue N0 = N->getOperand(0);
-  SDValue IfTrue = N->getOperand(1);
-  SDValue IfFalse = N->getOperand(2);
-
-  // Check if the SELECT merges up the final result of the min/max
-  // from a vector.
-  if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-    return SDValue();
-
-  // Expect N0 is fed by SETCC.
-  SDValue SetCC = N0.getOperand(0);
-  EVT SetCCVT = SetCC.getValueType();
-  if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
-      SetCCVT.getVectorElementType() != MVT::i1)
-    return SDValue();
-
-  SDValue VectorOp = SetCC.getOperand(0);
-  unsigned Op = VectorOp->getOpcode();
-  // Check if the input vector is fed by the operator we want to handle.
-  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
-      Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
-    return SDValue();
-
-  EVT VTy = VectorOp.getValueType();
-  if (!VTy.isVector())
-    return SDValue();
-
-  if (VTy.getSizeInBits() < 64)
-    return SDValue();
-
-  EVT EltTy = VTy.getVectorElementType();
-  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
-    if (EltTy != MVT::f32)
-      return SDValue();
-  } else {
-    if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
-      return SDValue();
-  }
-
-  // Check if extracting from the same vector.
-  // For example,
-  //   %sc = setcc %vector, %svn1, gt
-  //   %n0 = extract_vector_elt %sc, #0
-  //   %n1 = extract_vector_elt %vector, #0
-  //   %n2 = extract_vector_elt $vector, #1
-  if (!(VectorOp == IfTrue->getOperand(0) &&
-        VectorOp == IfFalse->getOperand(0)))
-    return SDValue();
-
-  // Check if the condition code is matched with the operator type.
-  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
-  if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
-      (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
-      (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
-      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
-      (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
-       CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
-       CC != ISD::SETGE) ||
-      (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
-       CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
-       CC != ISD::SETLE))
-    return SDValue();
-
-  // Expect to check only lane 0 from the vector SETCC.
-  if (!isNullConstant(N0.getOperand(1)))
-    return SDValue();
-
-  // Expect to extract the true value from lane 0.
-  if (!isNullConstant(IfTrue.getOperand(1)))
-    return SDValue();
-
-  // Expect to extract the false value from lane 1.
-  if (!isOneConstant(IfFalse.getOperand(1)))
-    return SDValue();
-
-  return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
-}
-
-/// Target-specific DAG combine for the across vector add reduction.
-/// This function specifically handles the final clean-up step of the vector
-/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
-/// pattern, which adds all elements of a vector together.
-/// For example, for a <4 x i32> vector :
-///   %1 = vector_shuffle %0, <2,3,u,u>
-///   %2 = add %0, %1
-///   %3 = vector_shuffle %2, <1,u,u,u>
-///   %4 = add %2, %3
-///   %result = extract_vector_elt %4, 0
-/// becomes :
-///   %0 = uaddv %0
-///   %result = extract_vector_elt %0, 0
-static SDValue
-performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
-                                     const AArch64Subtarget *Subtarget) {
-  if (!Subtarget->hasNEON())
-    return SDValue();
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  // Check if the input vector is fed by the ADD.
-  if (N0->getOpcode() != ISD::ADD)
-    return SDValue();
-
-  // The vector extract idx must constant zero because we only expect the final
-  // result of the reduction is placed in lane 0.
-  if (!isNullConstant(N1))
-    return SDValue();
-
-  EVT VTy = N0.getValueType();
-  if (!VTy.isVector())
-    return SDValue();
-
-  EVT EltTy = VTy.getVectorElementType();
-  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
-    return SDValue();
-
-  if (VTy.getSizeInBits() < 64)
-    return SDValue();
-
-  return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
-}
 
 /// Target-specific DAG combine function for NEON load/store intrinsics
 /// to merge base address updates.
@@ -10428,12 +10229,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performBitcastCombine(N, DCI, DAG);
   case ISD::CONCAT_VECTORS:
     return performConcatVectorsCombine(N, DCI, DAG);
-  case ISD::SELECT: {
-    SDValue RV = performSelectCombine(N, DCI);
-    if (!RV.getNode())
-      RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
-    return RV;
-  }
+  case ISD::SELECT:
+    return performSelectCombine(N, DCI);
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
   case ISD::LOAD:
@@ -10455,8 +10252,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performNVCASTCombine(N);
   case ISD::INSERT_VECTOR_ELT:
     return performPostLD1Combine(N, DCI, true);
-  case ISD::EXTRACT_VECTOR_ELT:
-    return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -10676,6 +10471,14 @@ void AArch64TargetLowering::ReplaceNodeResults(
   case ISD::BITCAST:
     ReplaceBITCASTResults(N, Results, DAG);
     return;
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+    Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
+    return;
+
   case AArch64ISD::SADDV:
     ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
     return;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 89db566c219c..ecc2517fb288 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -568,6 +568,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                         std::vector<SDNode *> *Created) const override;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7c6f55c06bce..43569af04347 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -769,3 +769,28 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() {
 unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
   return ST->getMaxPrefetchIterationsAhead();
 }
+
+bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
+                                           TTI::ReductionFlags Flags) const {
+  assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
+  unsigned ScalarBits = Ty->getScalarSizeInBits();
+  switch (Opcode) {
+  case Instruction::FAdd:
+  case Instruction::FMul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Mul:
+    return false;
+  case Instruction::Add:
+    return ScalarBits * Ty->getVectorNumElements() >= 128;
+  case Instruction::ICmp:
+    return (ScalarBits < 64) &&
+           (ScalarBits * Ty->getVectorNumElements() >= 128);
+  case Instruction::FCmp:
+    return Flags.NoNaN;
+  default:
+    llvm_unreachable("Unhandled reduction opcode");
+  }
+  return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 280d97f3c502..d0299149c38c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -145,6 +145,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   bool shouldExpandReduction(const IntrinsicInst *II) const {
     return false;
   }
+
+  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+                             TTI::ReductionFlags Flags) const;
   /// @}
 };
 
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7c99752b881f..c3ac796a0a44 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1707,10 +1707,38 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
 
   // FIXME: Look for on separate components
   if (Src.getOpcode() == ISD::FNEG) {
-    Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI);
+    Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
     Src = Src.getOperand(0);
   }
 
+  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+    unsigned VecMods = Mods;
+
+    SDValue Lo = Src.getOperand(0);
+    SDValue Hi = Src.getOperand(1);
+
+    if (Lo.getOpcode() == ISD::FNEG) {
+      Lo = Lo.getOperand(0);
+      Mods ^= SISrcMods::NEG;
+    }
+
+    if (Hi.getOpcode() == ISD::FNEG) {
+      Hi = Hi.getOperand(0);
+      Mods ^= SISrcMods::NEG_HI;
+    }
+
+    if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
+      // Really a scalar input. Just select from the low half of the register to
+      // avoid packing.
+
+      Src = Lo;
+      SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+      return true;
+    }
+
+    Mods = VecMods;
+  }
+
   // Packed instructions do not have abs modifiers.
 
   // FIXME: Handle abs/neg of individual components.
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index bed7d326b3dd..e543cae07ada 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -289,6 +289,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
     return getGeneration() >= GFX9;
   }
 
+  bool hasMin3Max3_16() const {
+    return getGeneration() >= GFX9;
+  }
+
   bool hasCARRY() const {
     return (getGeneration() >= EVERGREEN);
   }
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 48a14e4dbea2..286be355bc14 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4491,7 +4491,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
 
 
   if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
-      VT != MVT::f64) {
+      VT != MVT::f64 &&
+      ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
     // max(max(a, b), c) -> max3(a, b, c)
     // min(min(a, b), c) -> min3(a, b, c)
     if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 065fd09eb356..38a16b525a75 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -765,7 +765,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIndex)               // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
     // Add the scratch resource registers as implicit uses because we may end up
     // needing them, and need to ensure that the reserved registers are
     // correctly handled.
@@ -796,7 +796,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(isKill)) // data
     .addFrameIndex(FrameIndex)               // addr
     .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
-    .addReg(MFI->getScratchWaveOffsetReg())  // scratch_offset
+    .addReg(MFI->getFrameOffsetReg())        // scratch_offset
     .addImm(0)                               // offset
     .addMemOperand(MMO);
 }
@@ -869,7 +869,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIndex) // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
@@ -892,10 +892,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
   unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
   BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-    .addFrameIndex(FrameIndex)              // vaddr
-    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
-    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
-    .addImm(0)                              // offset
+    .addFrameIndex(FrameIndex)        // vaddr
+    .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+    .addReg(MFI->getFrameOffsetReg()) // scratch_offset
+    .addImm(0)                        // offset
     .addMemOperand(MMO);
 }
 
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8820e294562b..06cfc95be96a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -654,11 +654,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getScratchWaveOffsetReg())
+          .addReg(MFI->getFrameOffsetReg())
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getScratchWaveOffsetReg());
+          .addReg(MFI->getFrameOffsetReg());
       }
 
       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
@@ -715,11 +715,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                    EltSize, MinAlign(Align, EltSize * i));
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
-        .addReg(TmpReg, RegState::Kill)         // src
-        .addFrameIndex(Index)                   // vaddr
-        .addReg(MFI->getScratchRSrcReg())       // srrsrc
-        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
-        .addImm(i * 4)                          // offset
+        .addReg(TmpReg, RegState::Kill)    // src
+        .addFrameIndex(Index)              // vaddr
+        .addReg(MFI->getScratchRSrcReg())  // srrsrc
+        .addReg(MFI->getFrameOffsetReg())  // soffset
+        .addImm(i * 4)                     // offset
         .addMemOperand(MMO);
     }
   }
@@ -806,11 +806,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getScratchWaveOffsetReg())
+          .addReg(MFI->getFrameOffsetReg())
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getScratchWaveOffsetReg());
+          .addReg(MFI->getFrameOffsetReg());
       }
 
       auto MIB =
@@ -853,10 +853,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         MinAlign(Align, EltSize * i));
 
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
-        .addFrameIndex(Index)                   // vaddr
-        .addReg(MFI->getScratchRSrcReg())       // srsrc
-        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
-        .addImm(i * 4)                          // offset
+        .addFrameIndex(Index)              // vaddr
+        .addReg(MFI->getScratchRSrcReg())  // srsrc
+        .addReg(MFI->getFrameOffsetReg())  // soffset
+        .addImm(i * 4)                     // offset
         .addMemOperand(MMO);
 
       auto MIB =
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index ffa6c60d6b1f..c0b5069948fb 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -300,10 +300,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 
 def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
 def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
 def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
 def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
-}
+
+def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>;
+def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>;
+def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>;
+
+def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>;
+def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>;
+def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>;
+} // End SubtargetPredicate = isGFX9
 
 
 //===----------------------------------------------------------------------===//
@@ -509,6 +518,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>;
 defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
 
 defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
+
+defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>;
+defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>;
+defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>;
+
+defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>;
+defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>;
+defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>;
+
 defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
 defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
 defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 8c680cdf9b47..b1f059835ff5 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -345,25 +345,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
     I.setDesc(TII.get(COPY));
     return selectCopy(I, TII, MRI, TRI, RBI);
   }
-  case G_ADD:
   case G_GEP:
     I.setDesc(TII.get(ARM::ADDrr));
     MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
     break;
-  case G_SUB:
-    I.setDesc(TII.get(ARM::SUBrr));
-    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
-    break;
-  case G_MUL:
-    if (TII.getSubtarget().hasV6Ops()) {
-      I.setDesc(TII.get(ARM::MUL));
-    } else {
-      assert(TII.getSubtarget().useMulOps() && "Unsupported target");
-      I.setDesc(TII.get(ARM::MULv5));
-      MIB->getOperand(0).setIsEarlyClobber(true);
-    }
-    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
-    break;
   case G_FRAME_INDEX:
     // Add 0 to the given frame index and hope it will eventually be folded into
     // the user(s).
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index d0fd366ab9ed..1a17d4e33e4f 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -571,8 +571,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
   GPRsNoLRSP.reset(ARM::LR);
   GPRsNoLRSP.reset(ARM::SP);
   GPRsNoLRSP.reset(ARM::PC);
-  for (int Register = GPRsNoLRSP.find_first(); Register != -1;
-       Register = GPRsNoLRSP.find_next(Register)) {
+  for (unsigned Register : GPRsNoLRSP.set_bits()) {
     if (!UsedRegs.contains(Register)) {
       // Remember the first pop-friendly register and exit.
       if (PopFriendly.test(Register)) {
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index ae58c26e145a..1597057ad63f 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -386,7 +386,7 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
 void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
   BitVector AllocSet = TRI.getAllocatableSet(MF);
 
-  for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+  for (unsigned R : AllocSet.set_bits())
     for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
       AllocSet.set(*AI);
 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 40bfe3a449f7..57a1d373c88c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1765,31 +1765,36 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   // Check whether the frame pointer register is allocated. If so, make sure it
   // is spilled to the correct offset.
   if (needsFP(MF)) {
-    HasGPSaveArea = true;
-
     int FI = PFI->getFramePointerSaveIndex();
     assert(FI && "No Frame Pointer Save Slot!");
-
     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+    // FP is R31/X31, so no need to update MinGPR/MinG8R.
+    HasGPSaveArea = true;
   }
 
   if (PFI->usesPICBase()) {
-    HasGPSaveArea = true;
-
     int FI = PFI->getPICBasePointerSaveIndex();
     assert(FI && "No PIC Base Pointer Save Slot!");
-
     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+
+    MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
+    HasGPSaveArea = true;
   }
 
   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   if (RegInfo->hasBasePointer(MF)) {
-    HasGPSaveArea = true;
-
     int FI = PFI->getBasePointerSaveIndex();
     assert(FI && "No Base Pointer Save Slot!");
-
     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+
+    unsigned BP = RegInfo->getBaseRegister(MF);
+    if (PPC::G8RCRegClass.contains(BP)) {
+      MinG8R = std::min<unsigned>(MinG8R, BP);
+      HasG8SaveArea = true;
+    } else if (PPC::GPRCRegClass.contains(BP)) {
+      MinGPR = std::min<unsigned>(MinGPR, BP);
+      HasGPSaveArea = true;
+    }
   }
 
   // General register save area starts right below the Floating-point
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 17bdd595da10..144aea850833 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -410,6 +410,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // To handle counter-based loop conditions.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
   // Comparisons that require checking two conditions.
   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -8184,6 +8189,26 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return Flags;
 }
 
+SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
+  // the beginning of the argument list.
+  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
+  SDLoc DL(Op);
+  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
+  case Intrinsic::ppc_cfence: {
+    assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
+    return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+                                      DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+                                                  Op.getOperand(ArgStart + 1))),
+                   0);
+  }
+  default:
+    break;
+  }
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -8649,6 +8674,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   // Frame & Return address.
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+
+  case ISD::INTRINSIC_VOID:
+    return LowerINTRINSIC_VOID(Op, DAG);
   }
 }
 
@@ -8753,12 +8781,19 @@ Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
                                                   Instruction *Inst,
                                                   AtomicOrdering Ord) const {
-  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord))
+  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
+    // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+    // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+    // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+    if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+      return Builder.CreateCall(
+          Intrinsic::getDeclaration(
+              Builder.GetInsertBlock()->getParent()->getParent(),
+              Intrinsic::ppc_cfence, {Inst->getType()}),
+          {Inst});
+    // FIXME: Can use isync for rmw operation.
     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
-  // FIXME: this is too conservative, a dependent branch + isync is enough.
-  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
-  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
-  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+  }
   return nullptr;
 }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 4fc744257262..acb77943b118 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -905,6 +905,7 @@ namespace llvm {
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index a8433919f0f3..a3f894c81a01 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -983,6 +983,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
                         [(set i64:$rD,
                           (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
+
+let isBarrier = 1, isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+
 def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
           (ADD8TLS $in, tglobaltlsaddr:$g)>;
 def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 790a8902b3d2..3afcec1248d5 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1873,6 +1873,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
 }
 
 bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  auto &MBB = *MI.getParent();
+  auto DL = MI.getDebugLoc();
   switch (MI.getOpcode()) {
   case TargetOpcode::LOAD_STACK_GUARD: {
     assert(Subtarget.isTargetLinux() &&
@@ -1920,6 +1922,17 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(Opcode));
     return true;
   }
+  case PPC::CFENCE8: {
+    auto Val = MI.getOperand(0).getReg();
+    BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val);
+    BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
+        .addImm(PPC::PRED_NE_MINUS)
+        .addReg(PPC::CR7)
+        .addImm(1);
+    MI.setDesc(get(PPC::ISYNC));
+    MI.RemoveOperand(0);
+    return true;
+  }
   }
   return false;
 }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1af5e7f28342..0766cfe4a987 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1223,9 +1223,15 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
   // a two-value operand where a dag node expects two operands. :(
   let isCodeGenOnly = 1 in {
-    def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
-                    "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
-                    /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+    class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+                            "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+                            /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+    def BCC : BCC_class;
+
+    // The same as BCC, except that it's not a terminator. Used for introducing
+    // control flow dependency without creating new blocks.
+    let isTerminator = 0 in def CTRL_DEP : BCC_class;
+
     def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
                      "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
 
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index f56b238f91e6..6a3dc6799c43 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost(
 
   unsigned ScalarBits = Ty->getScalarSizeInBits();
 
+  // Div with a constant which is a power of 2 will be converted by
+  // DAGCombiner to use shifts. With vector shift-element instructions, a
+  // vector sdiv costs about as much as a scalar one.
+  const unsigned SDivCostEstimate = 4;
+  bool SDivPow2 = false;
+  bool UDivPow2 = false;
+  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
+      Args.size() == 2) {
+    const ConstantInt *CI = nullptr;
+    if (const Constant *C = dyn_cast<Constant>(Args[1])) {
+      if (C->getType()->isVectorTy())
+        CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+      else
+        CI = dyn_cast<const ConstantInt>(C);
+    }
+    if (CI != nullptr &&
+        (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
+      if (Opcode == Instruction::SDiv)
+        SDivPow2 = true;
+      else
+        UDivPow2 = true;
+    }
+  }
+
   if (Ty->isVectorTy()) {
     assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
     unsigned VF = Ty->getVectorNumElements();
@@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost(
     // These vector operations are custom handled, but are still supported
     // with one instruction per vector, regardless of element size.
     if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
-        Opcode == Instruction::AShr) {
+        Opcode == Instruction::AShr || UDivPow2) {
       return NumVectors;
     }
 
+    if (SDivPow2)
+      return (NumVectors * SDivCostEstimate);
+
     // These FP operations are supported with a single vector instruction for
     // double (base implementation assumes float generally costs 2). For
     // FP128, the scalar cost is 1, and there is no overhead since the values
@@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
       // 2 * ipm sequences ; xor ; shift ; compare
       return 7;
 
+    if (UDivPow2)
+      return 1;
+    if (SDivPow2)
+      return SDivCostEstimate;
+
     // An extra extension for narrow types is needed.
     if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
       // sext of op(s) for narrow types
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 5fd4a8d1949e..ba39b6cdb568 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -140,8 +140,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
 
     // Check if it's possible to reuse any of the used colors.
     if (!MRI->isLiveIn(Old))
-      for (int C(UsedColors.find_first()); C != -1;
-           C = UsedColors.find_next(C)) {
+      for (unsigned C : UsedColors.set_bits()) {
         if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
           continue;
         for (LiveInterval *OtherLI : Assignments[C])
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 8e8e5fd1eff1..54619589c341 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -33,9 +33,6 @@ built-in-setjmp.c
 pr60003.c
 
 # Error in the program / unsupported by Clang.
-scal-to-vec1.c
-scal-to-vec2.c
-scal-to-vec3.c
 20000822-1.c
 20010209-1.c
 20010605-1.c
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3a421fe77392..784c3a6557ff 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -235,8 +235,6 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
                                    "LEA instruction needs inputs at AG stage">;
 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
                                    "LEA instruction with certain arguments is slow">;
-def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
-                                   "LEA instruction with 3 ops or certain registers is slow">;
 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
                                    "INC and DEC instructions are slower than ADD and SUB">;
 def FeatureSoftFloat
@@ -482,7 +480,6 @@ def SNBFeatures : ProcessorFeatures<[], [
   FeatureXSAVE,
   FeatureXSAVEOPT,
   FeatureLAHFSAHF,
-  FeatureSlow3OpsLEA,
   FeatureFastScalarFSQRT,
   FeatureFastSHLDRotate
 ]>;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 9f649dad8bc0..2cd4c1a3e7b3 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -27,26 +27,20 @@
 #include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
-namespace llvm {
-void initializeFixupLEAPassPass(PassRegistry &);
-}
-
-#define FIXUPLEA_DESC "X86 LEA Fixup"
-#define FIXUPLEA_NAME "x86-fixup-LEAs"
-
-#define DEBUG_TYPE FIXUPLEA_NAME
+#define DEBUG_TYPE "x86-fixup-LEAs"
 
 STATISTIC(NumLEAs, "Number of LEA instructions created");
 
 namespace {
 class FixupLEAPass : public MachineFunctionPass {
   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
-
+  static char ID;
   /// \brief Loop over all of the instructions in the basic block
   /// replacing applicable instructions with LEA instructions,
   /// where appropriate.
   bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
 
+  StringRef getPassName() const override { return "X86 LEA Fixup"; }
 
   /// \brief Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
@@ -68,22 +62,6 @@ class FixupLEAPass : public MachineFunctionPass {
   void processInstructionForSLM(MachineBasicBlock::iterator &I,
                                 MachineFunction::iterator MFI);
 
-
-  /// \brief Given a LEA instruction which is unprofitable
-  /// on SNB+ try to replace it with other instructions.
-  /// According to Intel's Optimization Reference Manual:
-  /// " For LEA instructions with three source operands and some specific
-  ///   situations, instruction latency has increased to 3 cycles, and must
-  ///   dispatch via port 1:
-  /// - LEA that has all three source operands: base, index, and offset
-  /// - LEA that uses base and index registers where the base is EBP, RBP,
-  ///   or R13
-  /// - LEA that uses RIP relative addressing mode
-  /// - LEA that uses 16-bit addressing mode "
-  /// This function currently handles the first 2 cases only.
-  MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
-                                          MachineFunction::iterator MFI);
-
   /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
   /// and convert them to INC or DEC respectively.
   bool fixupIncDec(MachineBasicBlock::iterator &I,
@@ -107,13 +85,7 @@ class FixupLEAPass : public MachineFunctionPass {
                                    MachineBasicBlock::iterator &MBBI) const;
 
 public:
-  static char ID;
-
-  StringRef getPassName() const override { return FIXUPLEA_DESC; }
-
-  FixupLEAPass() : MachineFunctionPass(ID) {
-    initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
-  }
+  FixupLEAPass() : MachineFunctionPass(ID) {}
 
   /// \brief Loop over all of the basic blocks,
   /// replacing instructions by equivalent LEA instructions
@@ -132,11 +104,8 @@ class FixupLEAPass : public MachineFunctionPass {
   bool OptIncDec;
   bool OptLEA;
 };
-}
-
 char FixupLEAPass::ID = 0;
-
-INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
+}
 
 MachineInstr *
 FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
@@ -199,7 +168,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
   OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
-  OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
+  OptLEA = ST.LEAusesAG() || ST.slowLEA();
 
   if (!OptLEA && !OptIncDec)
     return false;
@@ -273,64 +242,9 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
   return MachineBasicBlock::iterator();
 }
 
-static inline bool isLEA(const int Opcode) {
-  return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
-         Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
-}
-
-static inline bool isInefficientLEAReg(unsigned int Reg) {
-  return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
-}
-
-static inline bool isRegOperand(const MachineOperand &Op) {
-  return Op.isReg() && Op.getReg() != X86::NoRegister;
-}
-/// hasIneffecientLEARegs - LEA that uses base and index registers
-/// where the base is EBP, RBP, or R13
-static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
-                                            const MachineOperand &Index) {
-  return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
-         isRegOperand(Index);
-}
-
-static inline bool hasLEAOffset(const MachineOperand &Offset) {
-  return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
-}
-
-// LEA instruction that has all three operands: offset, base and index
-static inline bool isThreeOperandsLEA(const MachineOperand &Base,
-                                      const MachineOperand &Index,
-                                      const MachineOperand &Offset) {
-  return isRegOperand(Base) && isRegOperand(Index) && hasLEAOffset(Offset);
-}
-
-static inline int getADDrrFromLEA(int LEAOpcode) {
-  switch (LEAOpcode) {
-  default:
-    llvm_unreachable("Unexpected LEA instruction");
-  case X86::LEA16r:
-    return X86::ADD16rr;
-  case X86::LEA32r:
-    return X86::ADD32rr;
-  case X86::LEA64_32r:
-  case X86::LEA64r:
-    return X86::ADD64rr;
-  }
-}
-
-static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
-  bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
-  switch (LEAOpcode) {
-  default:
-    llvm_unreachable("Unexpected LEA instruction");
-  case X86::LEA16r:
-    return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
-  case X86::LEA32r:
-  case X86::LEA64_32r:
-    return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
-  case X86::LEA64r:
-    return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
-  }
+static inline bool isLEA(const int opcode) {
+  return opcode == X86::LEA16r || opcode == X86::LEA32r ||
+         opcode == X86::LEA64r || opcode == X86::LEA64_32r;
 }
 
 /// isLEASimpleIncOrDec - Does this LEA have one these forms:
@@ -423,8 +337,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
 void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
                                             MachineFunction::iterator MFI) {
   MachineInstr &MI = *I;
-  const int Opcode = MI.getOpcode();
-  if (!isLEA(Opcode))
+  const int opcode = MI.getOpcode();
+  if (!isLEA(opcode))
     return;
   if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
       !TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -436,144 +350,55 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
     return;
   if (MI.getOperand(2).getImm() > 1)
     return;
+  int addrr_opcode, addri_opcode;
+  switch (opcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA16r:
+    addrr_opcode = X86::ADD16rr;
+    addri_opcode = X86::ADD16ri;
+    break;
+  case X86::LEA32r:
+    addrr_opcode = X86::ADD32rr;
+    addri_opcode = X86::ADD32ri;
+    break;
+  case X86::LEA64_32r:
+  case X86::LEA64r:
+    addrr_opcode = X86::ADD64rr;
+    addri_opcode = X86::ADD64ri32;
+    break;
+  }
   DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
   DEBUG(dbgs() << "FixLEA: Replaced by: ";);
   MachineInstr *NewMI = nullptr;
+  const MachineOperand &Dst = MI.getOperand(0);
   // Make ADD instruction for two registers writing to LEA's destination
   if (SrcR1 != 0 && SrcR2 != 0) {
-    const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
-    const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
-    NewMI =
-        BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
+    const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3);
+    const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+    NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode))
+                .add(Dst)
+                .add(Src1)
+                .add(Src2);
+    MFI->insert(I, NewMI);
     DEBUG(NewMI->dump(););
   }
   // Make ADD instruction for immediate
   if (MI.getOperand(4).getImm() != 0) {
-    const MCInstrDesc &ADDri =
-        TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
     const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
-    NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
+    NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode))
+                .add(Dst)
                 .add(SrcR)
                 .addImm(MI.getOperand(4).getImm());
+    MFI->insert(I, NewMI);
     DEBUG(NewMI->dump(););
   }
   if (NewMI) {
     MFI->erase(I);
-    I = NewMI;
+    I = static_cast<MachineBasicBlock::iterator>(NewMI);
   }
 }
 
-MachineInstr *
-FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
-                                        MachineFunction::iterator MFI) {
-
-  const int LEAOpcode = MI.getOpcode();
-  if (!isLEA(LEAOpcode))
-    return nullptr;
-
-  const MachineOperand &Dst = MI.getOperand(0);
-  const MachineOperand &Base = MI.getOperand(1);
-  const MachineOperand &Scale = MI.getOperand(2);
-  const MachineOperand &Index = MI.getOperand(3);
-  const MachineOperand &Offset = MI.getOperand(4);
-  const MachineOperand &Segment = MI.getOperand(5);
-
-  if (!(isThreeOperandsLEA(Base, Index, Offset) ||
-        hasInefficientLEABaseReg(Base, Index)) ||
-      !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
-      Segment.getReg() != X86::NoRegister)
-    return nullptr;
-
-  unsigned int DstR = Dst.getReg();
-  unsigned int BaseR = Base.getReg();
-  unsigned int IndexR = Index.getReg();
-  unsigned SSDstR =
-      (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
-  bool IsScale1 = Scale.getImm() == 1;
-  bool IsInefficientBase = isInefficientLEAReg(BaseR);
-  bool IsInefficientIndex = isInefficientLEAReg(IndexR);
-
-  // Skip these cases since it takes more than 2 instructions
-  // to replace the LEA instruction.
-  if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
-    return nullptr;
-  if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
-      (IsInefficientIndex || !IsScale1))
-    return nullptr;
-
-  const DebugLoc DL = MI.getDebugLoc();
-  const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
-  const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
-
-  DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
-  DEBUG(dbgs() << "FixLEA: Replaced by: ";);
-
-  // First try to replace LEA with one or two (for the 3-op LEA case)
-  // add instructions:
-  // 1.lea (%base,%index,1), %base => add %index,%base
-  // 2.lea (%base,%index,1), %index => add %base,%index
-  if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
-    const MachineOperand &Src = DstR == BaseR ? Index : Base;
-    MachineInstr *NewMI =
-        BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
-    DEBUG(NewMI->dump(););
-    // Create ADD instruction for the Offset in case of 3-Ops LEA.
-    if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
-      DEBUG(NewMI->dump(););
-    }
-    return NewMI;
-  }
-  // If the base is inefficient try switching the index and base operands,
-  // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
-  // lea offset(%base,%index,scale),%dst =>
-  // lea (%base,%index,scale); add offset,%dst
-  if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
-    MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
-                              .add(Dst)
-                              .add(IsInefficientBase ? Index : Base)
-                              .add(Scale)
-                              .add(IsInefficientBase ? Base : Index)
-                              .addImm(0)
-                              .add(Segment);
-    DEBUG(NewMI->dump(););
-    // Create ADD instruction for the Offset in case of 3-Ops LEA.
-    if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
-      DEBUG(NewMI->dump(););
-    }
-    return NewMI;
-  }
-  // Handle the rest of the cases with inefficient base register:
-  assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
-  assert(IsInefficientBase && "efficient base should be handled already!");
-
-  // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
-  if (IsScale1 && !hasLEAOffset(Offset)) {
-    TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill());
-    DEBUG(MI.getPrevNode()->dump(););
-
-    MachineInstr *NewMI =
-        BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
-    DEBUG(NewMI->dump(););
-    return NewMI;
-  }
-  // lea offset(%base,%index,scale), %dst =>
-  // lea offset( ,%index,scale), %dst; add %base,%dst
-  MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
-                            .add(Dst)
-                            .addReg(0)
-                            .add(Scale)
-                            .add(Index)
-                            .add(Offset)
-                            .add(Segment);
-  DEBUG(NewMI->dump(););
-
-  NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
-  DEBUG(NewMI->dump(););
-  return NewMI;
-}
-
 bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
                                      MachineFunction::iterator MFI) {
 
@@ -585,16 +410,8 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
     if (OptLEA) {
       if (MF.getSubtarget<X86Subtarget>().isSLM())
         processInstructionForSLM(I, MFI);
-
-      else {
-        if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
-          if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
-            MFI->erase(I);
-            I = NewMI;
-          }
-        } else
-          processInstruction(I, MFI);
-      }
+      else
+        processInstruction(I, MFI);
     }
   }
   return false;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index de58d719acb4..5eb5ad52840a 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -19,6 +19,7 @@
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -72,6 +73,9 @@ class X86InstructionSelector : public InstructionSelector {
   bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
                  MachineFunction &MF) const;
 
+  bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
+                   MachineFunction &MF) const;
+
   const X86TargetMachine &TM;
   const X86Subtarget &STI;
   const X86InstrInfo &TII;
@@ -243,6 +247,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
     return true;
   if (selectCmp(I, MRI, MF))
     return true;
+  if (selectUadde(I, MRI, MF))
+    return true;
 
   return false;
 }
@@ -564,6 +570,66 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
   return true;
 }
 
+bool X86InstructionSelector::selectUadde(MachineInstr &I,
+                                         MachineRegisterInfo &MRI,
+                                         MachineFunction &MF) const {
+  if (I.getOpcode() != TargetOpcode::G_UADDE)
+    return false;
+
+  const unsigned DstReg = I.getOperand(0).getReg();
+  const unsigned CarryOutReg = I.getOperand(1).getReg();
+  const unsigned Op0Reg = I.getOperand(2).getReg();
+  const unsigned Op1Reg = I.getOperand(3).getReg();
+  unsigned CarryInReg = I.getOperand(4).getReg();
+
+  const LLT DstTy = MRI.getType(DstReg);
+
+  if (DstTy != LLT::scalar(32))
+    return false;
+
+  // find CarryIn def instruction.
+  MachineInstr *Def = MRI.getVRegDef(CarryInReg);
+  while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
+    CarryInReg = Def->getOperand(1).getReg();
+    Def = MRI.getVRegDef(CarryInReg);
+  }
+
+  unsigned Opcode;
+  if (Def->getOpcode() == TargetOpcode::G_UADDE) {
+    // carry set by prev ADD.
+
+    BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
+        .addReg(CarryInReg);
+
+    if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
+      return false;
+
+    Opcode = X86::ADC32rr;
+  } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
+    // carry is constant, support only 0.
+    if (*val != 0)
+      return false;
+
+    Opcode = X86::ADD32rr;
+  } else
+    return false;
+
+  MachineInstr &AddInst =
+      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
+           .addReg(Op0Reg)
+           .addReg(Op1Reg);
+
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
+      .addReg(X86::EFLAGS);
+
+  if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
+      !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
+    return false;
+
+  I.eraseFromParent();
+  return true;
+}
+
 InstructionSelector *
 llvm::createX86InstructionSelector(const X86TargetMachine &TM,
                                    X86Subtarget &Subtarget,
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index cf26238c0239..8ce240714f17 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -59,6 +59,11 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
     for (auto Ty : {s8, s16, s32})
       setAction({BinOp, Ty}, Legal);
 
+  for (unsigned Op : {G_UADDE}) {
+    setAction({Op, s32}, Legal);
+    setAction({Op, 1, s1}, Legal);
+  }
+
   for (unsigned MemOp : {G_LOAD, G_STORE}) {
     for (auto Ty : {s8, s16, s32, p0})
       setAction({MemOp, Ty}, Legal);
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 02be95e2e556..de1514243aeb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -253,11 +253,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// True if the LEA instruction with certain arguments is slow
   bool SlowLEA;
 
-  /// True if the LEA instruction has all three source operands: base, index,
-  /// and offset or if the LEA instruction uses base and index registers where
-  /// the base is EBP, RBP,or R13
-  bool Slow3OpsLEA;
-
   /// True if INC and DEC instructions are slow when writing to flags
   bool SlowIncDec;
 
@@ -495,7 +490,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool callRegIndirect() const { return CallRegIndirect; }
   bool LEAusesAG() const { return LEAUsesAG; }
   bool slowLEA() const { return SlowLEA; }
-  bool slow3OpsLEA() const { return Slow3OpsLEA; }
   bool slowIncDec() const { return SlowIncDec; }
   bool hasCDI() const { return HasCDI; }
   bool hasPFI() const { return HasPFI; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c6a90725d89c..9a82e6e50463 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -61,7 +61,6 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
 namespace llvm {
 
 void initializeWinEHStatePassPass(PassRegistry &);
-void initializeFixupLEAPassPass(PassRegistry &);
 void initializeX86ExecutionDepsFixPass(PassRegistry &);
 
 } // end namespace llvm
@@ -76,7 +75,6 @@ extern "C" void LLVMInitializeX86Target() {
   initializeWinEHStatePassPass(PR);
   initializeFixupBWInstPassPass(PR);
   initializeEvexToVexInstPassPass(PR);
-  initializeFixupLEAPassPass(PR);
   initializeX86ExecutionDepsFixPass(PR);
 }
 
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 80e18161a94b..8566bd91c89e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1392,6 +1392,16 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
   // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
   // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
+  static const CostTblEntry AVX512BWCostTbl[] = {
+    { ISD::BITREVERSE, MVT::v8i64,   5 },
+    { ISD::BITREVERSE, MVT::v16i32,  5 },
+    { ISD::BITREVERSE, MVT::v32i16,  5 },
+    { ISD::BITREVERSE, MVT::v64i8,   5 },
+  };
+  static const CostTblEntry AVX512CostTbl[] = {
+    { ISD::BITREVERSE, MVT::v8i64,  36 },
+    { ISD::BITREVERSE, MVT::v16i32, 24 },
+  };
   static const CostTblEntry XOPCostTbl[] = {
     { ISD::BITREVERSE, MVT::v4i64,   4 },
     { ISD::BITREVERSE, MVT::v8i32,   4 },
@@ -1550,6 +1560,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   MVT MTy = LT.second;
 
   // Attempt to lookup cost.
+  if (ST->hasBWI())
+    if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+
+  if (ST->hasAVX512())
+    if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+
   if (ST->hasXOP())
     if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
       return LT.first * Entry->Cost;
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 4480220f2cd4..417d57f7625b 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -347,6 +347,27 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
   return FrameTy;
 }
 
+// We need to make room to insert a spill after initial PHIs, but before
+// catchswitch instruction. Placing it before violates the requirement that
+// catchswitch, like all other EHPads must be the first nonPHI in a block.
+//
+// Split away catchswitch into a separate block and insert in its place:
+//
+//   cleanuppad <InsertPt> cleanupret.
+//
+// cleanupret instruction will act as an insert point for the spill.
+static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
+  BasicBlock *CurrentBlock = CatchSwitch->getParent();
+  BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
+  CurrentBlock->getTerminator()->eraseFromParent();
+
+  auto *CleanupPad =
+      CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
+  auto *CleanupRet =
+      CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
+  return CleanupRet;
+}
+
 // Replace all alloca and SSA values that are accessed across suspend points
 // with GetElementPointer from coroutine frame + loads and stores. Create an
 // AllocaSpillBB that will become the new entry block for the resume parts of
@@ -437,8 +458,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
           InsertPt = NewBB->getTerminator();
         } else if (dyn_cast<PHINode>(CurrentValue)) {
           // Skip the PHINodes and EH pads instructions.
-          InsertPt =
-              &*cast<Instruction>(E.def())->getParent()->getFirstInsertionPt();
+          BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
+          if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
+            InsertPt = splitBeforeCatchSwitch(CSI);
+          else
+            InsertPt = &*DefBlock->getFirstInsertionPt();
         } else {
           // For all other values, the spill is placed immediately after
           // the definition.
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1424f61fe701..f88a2c6acc3f 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -74,6 +74,27 @@ static inline unsigned getComplexity(Value *V) {
   return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
 }
 
+/// Predicate canonicalization reduces the number of patterns that need to be
+/// matched by other transforms. For example, we may swap the operands of a
+/// conditional branch or select to create a compare with a canonical (inverted)
+/// predicate which is then more likely to be matched with other values.
+static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
+  switch (Pred) {
+  case CmpInst::ICMP_NE:
+  case CmpInst::ICMP_ULE:
+  case CmpInst::ICMP_SLE:
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_SGE:
+  // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_OGE:
+    return false;
+  default:
+    return true;
+  }
+}
+
 /// \brief Add one to a Constant
 static inline Constant *AddOne(Constant *C) {
   return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 65b1148cb03b..7ed9fd566b37 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2210,37 +2210,17 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
     return &BI;
   }
 
-  // Canonicalize fcmp_one -> fcmp_oeq
-  FCmpInst::Predicate FPred; Value *Y;
-  if (match(&BI, m_Br(m_OneUse(m_FCmp(FPred, m_Value(X), m_Value(Y))),
-                      TrueDest, FalseDest))) {
-    // TODO: Why are we only transforming these 3 predicates?
-    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
-        FPred == FCmpInst::FCMP_OGE) {
-      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
-      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
-
-      // Swap Destinations and condition.
-      BI.swapSuccessors();
-      Worklist.Add(Cond);
-      return &BI;
-    }
-  }
-
-  // Canonicalize icmp_ne -> icmp_eq
-  ICmpInst::Predicate IPred;
-  if (match(&BI, m_Br(m_OneUse(m_ICmp(IPred, m_Value(X), m_Value(Y))),
-                      TrueDest, FalseDest))) {
-    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
-        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
-        IPred == ICmpInst::ICMP_SGE) {
-      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
-      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
-      // Swap Destinations and condition.
-      BI.swapSuccessors();
-      Worklist.Add(Cond);
-      return &BI;
-    }
+  // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
+  CmpInst::Predicate Pred;
+  if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest,
+                      FalseDest)) &&
+      !isCanonicalPredicate(Pred)) {
+    // Swap destinations and condition.
+    CmpInst *Cond = cast<CmpInst>(BI.getCondition());
+    Cond->setPredicate(CmpInst::getInversePredicate(Pred));
+    BI.swapSuccessors();
+    Worklist.Add(Cond);
+    return &BI;
   }
 
   return nullptr;
@@ -3053,7 +3033,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
         }
       }
 
-      InstrsForInstCombineWorklist.push_back(Inst);
+      // Skip processing debug intrinsics in InstCombine. Processing these call instructions
+      // consumes non-trivial amount of time and provides no value for the optimization.
+      if (!isa<DbgInfoIntrinsic>(Inst))
+        InstrsForInstCombineWorklist.push_back(Inst);
     }
 
     // Recursively visit successors.  If this is a branch or switch on a
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 340c81fed0fd..37b9c4b1094e 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -546,7 +546,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
     // If there are escaping uses of invariant.start instruction, the load maybe
     // non-invariant.
     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
-        II->hasNUsesOrMore(1))
+        !II->use_empty())
       continue;
     unsigned InvariantSizeInBits =
         cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 6693a26e8890..cb6223b070a6 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1292,13 +1292,15 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
   BasicBlock *PH = CurLoop->getLoopPreheader();
   Value *InitX = PhiX->getIncomingValueForBlock(PH);
   // If we check X != 0 before entering the loop we don't need a zero
-  // check in CTLZ intrinsic.
-  if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
-    if (BranchInst *PreCondBr =
-        dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
-      if (matchCondition(PreCondBr, PH) == InitX)
-        ZeroCheck = true;
-    }
+  // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the
+  // loop (if it is used we count CTLZ(X >> 1)).
+  if (!IsCntPhiUsedOutsideLoop)
+    if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
+      if (BranchInst *PreCondBr =
+          dyn_cast<BranchInst>(PreCondBB->getTerminator())) {
+        if (matchCondition(PreCondBr, PH) == InitX)
+          ZeroCheck = true;
+      }
 
   // Check if CTLZ intrinsic is profitable. Assume it is always profitable
   // if we delete the loop (the loop has only 6 instructions):
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ccedb98d7fa1..bd1f21c69eba 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3902,8 +3902,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
 
         // Compute the difference between the two.
         int64_t Imm = (uint64_t)JImm - M->first;
-        for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
-             LUIdx = UsedByIndices.find_next(LUIdx))
+        for (unsigned LUIdx : UsedByIndices.set_bits())
           // Make a memo of this use, offset, and register tuple.
           if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
             WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5e0a705782ea..0e7572f8d2e5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -642,6 +642,7 @@ class NewGVN {
   void updateProcessedCount(Value *V);
   void verifyMemoryCongruency() const;
   void verifyIterationSettled(Function &F);
+  void verifyStoreExpressions() const;
   bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
   BasicBlock *getBlockForValue(Value *V) const;
   void deleteExpression(const Expression *E) const;
@@ -2003,7 +2004,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
 
   // If it's not a memory use, set the MemoryAccess equivalence
   auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
-  bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA;
   if (InstMA)
     moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
   ValueToClass[I] = NewClass;
@@ -2029,31 +2029,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
       if (OldClass->getStoredValue())
         OldClass->setStoredValue(nullptr);
     }
-    // If we destroy the old access leader and it's a store, we have to
-    // effectively destroy the congruence class.  When it comes to scalars,
-    // anything with the same value is as good as any other.  That means that
-    // one leader is as good as another, and as long as you have some leader for
-    // the value, you are good.. When it comes to *memory states*, only one
-    // particular thing really represents the definition of a given memory
-    // state.  Once it goes away, we need to re-evaluate which pieces of memory
-    // are really still equivalent. The best way to do this is to re-value
-    // number things.  The only way to really make that happen is to destroy the
-    // rest of the class.  In order to effectively destroy the class, we reset
-    // ExpressionToClass for each by using the ValueToExpression mapping.  The
-    // members later get marked as touched due to the leader change.  We will
-    // create new congruence classes, and the pieces that are still equivalent
-    // will end back together in a new class.  If this becomes too expensive, it
-    // is possible to use a versioning scheme for the congruence classes to
-    // avoid the expressions finding this old class.  Note that the situation is
-    // different for memory phis, becuase they are evaluated anew each time, and
-    // they become equal not by hashing, but by seeing if all operands are the
-    // same (or only one is reachable).
-    if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) {
-      DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID()
-                   << " because MemoryAccess leader changed");
-      for (auto Member : *OldClass)
-        ExpressionToClass.erase(ValueToExpression.lookup(Member));
-    }
     OldClass->setLeader(getNextValueLeader(OldClass));
     OldClass->resetNextLeader();
     markValueLeaderChangeTouched(OldClass);
@@ -2062,7 +2037,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
 
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
-  ValueToExpression[I] = E;
   // This is guaranteed to return something, since it will at least find
   // TOP.
 
@@ -2132,6 +2106,18 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
     if (auto *CI = dyn_cast<CmpInst>(I))
       markPredicateUsersTouched(CI);
   }
+  // If we changed the class of the store, we want to ensure nothing finds the
+  // old store expression.  In particular, loads do not compare against stored
+  // value, so they will find old store expressions (and associated class
+  // mappings) if we leave them in the table.
+  if (ClassChanged && isa<StoreExpression>(E)) {
+    auto *OldE = ValueToExpression.lookup(I);
+    // It could just be that the old class died. We don't want to erase it if we
+    // just moved classes.
+    if (OldE && isa<StoreExpression>(OldE) && !OldE->equals(*E))
+      ExpressionToClass.erase(OldE);
+  }
+  ValueToExpression[I] = E;
 }
 
 // Process the fact that Edge (from, to) is reachable, including marking
@@ -2651,6 +2637,30 @@ void NewGVN::verifyIterationSettled(Function &F) {
 #endif
 }
 
+// Verify that for each store expression in the expression to class mapping,
+// only the latest appears, and multiple ones do not appear.
+// Because loads do not use the stored value when doing equality with stores,
+// if we don't erase the old store expressions from the table, a load can find
+// a no-longer valid StoreExpression.
+void NewGVN::verifyStoreExpressions() const {
+#ifndef NDEBUG
+  DenseSet<std::pair<const Value *, const Value *>> StoreExpressionSet;
+  for (const auto &KV : ExpressionToClass) {
+    if (auto *SE = dyn_cast<StoreExpression>(KV.first)) {
+      // Make sure a version that will conflict with loads is not already there
+      auto Res =
+          StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()});
+      assert(Res.second &&
+             "Stored expression conflict exists in expression table");
+      auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst());
+      assert(ValueExpr && ValueExpr->equals(*SE) &&
+             "StoreExpression in ExpressionToClass is not latest "
+             "StoreExpression for value");
+    }
+  }
+#endif
+}
+
 // This is the main value numbering loop, it iterates over the initial touched
 // instruction set, propagating value numbers, marking things touched, etc,
 // until the set of touched instructions is completely empty.
@@ -2668,8 +2678,7 @@ void NewGVN::iterateTouchedInstructions() {
     // TODO: As we hit a new block, we should push and pop equalities into a
     // table lookupOperandLeader can use, to catch things PredicateInfo
     // might miss, like edge-only equivalences.
-    for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
-         InstrNum = TouchedInstructions.find_next(InstrNum)) {
+    for (unsigned InstrNum : TouchedInstructions.set_bits()) {
 
       // This instruction was found to be dead. We don't bother looking
       // at it again.
@@ -2776,6 +2785,7 @@ bool NewGVN::runGVN() {
   iterateTouchedInstructions();
   verifyMemoryCongruency();
   verifyIterationSettled(F);
+  verifyStoreExpressions();
 
   Changed |= eliminateInstructions(F);
 
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ef29d4141600..53320bff0883 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1922,7 +1922,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
 
   // User must be a binary operator with one or more uses.
   Instruction *User = I->user_back();
-  if (!isa<BinaryOperator>(User) || !User->hasNUsesOrMore(1))
+  if (!isa<BinaryOperator>(User) || User->use_empty())
     return nullptr;
 
   unsigned UserOpcode = User->getOpcode();
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 4f608c97147d..b32a61a7e8f8 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1,4 +1,4 @@
-//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,41 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTree.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
 
 #define DEBUG_TYPE "simple-loop-unswitch"
 
@@ -174,7 +190,7 @@ static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
     // When the loop exit is directly unswitched we just need to update the
     // incoming basic block. We loop to handle weird cases with repeated
     // incoming blocks, but expect to typically only have one operand here.
-    for (auto i : llvm::seq<int>(0, PN->getNumOperands())) {
+    for (auto i : seq<int>(0, PN->getNumOperands())) {
       assert(PN->getIncomingBlock(i) == &OldExitingBB &&
              "Found incoming block different from unique predecessor!");
       PN->setIncomingBlock(i, &OldPH);
@@ -688,9 +704,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
 }
 
 namespace {
+
 class SimpleLoopUnswitchLegacyPass : public LoopPass {
 public:
   static char ID; // Pass ID, replacement for typeid
+
   explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
     initializeSimpleLoopUnswitchLegacyPassPass(
         *PassRegistry::getPassRegistry());
@@ -703,7 +721,8 @@ class SimpleLoopUnswitchLegacyPass : public LoopPass {
     getLoopAnalysisUsage(AU);
   }
 };
-} // namespace
+
+} // end anonymous namespace
 
 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (skipLoop(L))
diff --git a/test/Analysis/CostModel/SystemZ/div-pow2.ll b/test/Analysis/CostModel/SystemZ/div-pow2.ll
new file mode 100644
index 000000000000..9ef2dd71e8fa
--- /dev/null
+++ b/test/Analysis/CostModel/SystemZ/div-pow2.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+
+; Scalar sdiv
+
+define i64 @fun0(i64 %a) {
+  %r = sdiv i64 %a, 2
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i64 %a, 2
+}
+
+define i64 @fun1(i64 %a) {
+  %r = sdiv i64 %a, -4
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i64 %a, -4
+}
+
+define i32 @fun2(i32 %a) {
+  %r = sdiv i32 %a, 8
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i32 %a, 8
+}
+
+define i32 @fun3(i32 %a) {
+  %r = sdiv i32 %a, -16
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i32 %a, -16
+}
+
+define i16 @fun4(i16 %a) {
+  %r = sdiv i16 %a, 32
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i16 %a, 32
+}
+
+define i16 @fun5(i16 %a) {
+  %r = sdiv i16 %a, -64
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i16 %a, -64
+}
+
+define i8 @fun6(i8 %a) {
+  %r = sdiv i8 %a, 64
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i8 %a, 64
+}
+
+define i8 @fun7(i8 %a) {
+  %r = sdiv i8 %a, -128
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv i8 %a, -128
+}
+
+
+; Vector sdiv
+
+define <2 x i64> @fun8(<2 x i64> %a) {
+  %r = sdiv <2 x i64> %a, <i64 2, i64 2>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <2 x i64> %a, <i64 2, i64 2>
+}
+
+define <2 x i64> @fun9(<2 x i64> %a) {
+  %r = sdiv <2 x i64> %a, <i64 -4, i64 -4>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <2 x i64> %a, <i64 -4, i64 -4>
+}
+
+define <4 x i32> @fun10(<4 x i32> %a) {
+  %r = sdiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+}
+
+define <4 x i32> @fun11(<4 x i32> %a) {
+  %r = sdiv <4 x i32> %a, <i32 -16, i32 -16, i32 -16, i32 -16>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <4 x i32> %a, <i32 -16
+}
+
+define <8 x i16> @fun12(<8 x i16> %a) {
+  %r = sdiv <8 x i16> %a, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <8 x i16> %a, <i16 32
+}
+
+define <8 x i16> @fun13(<8 x i16> %a) {
+  %r = sdiv <8 x i16> %a, <i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64, i16 -64>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <8 x i16> %a, <i16 -64
+}
+
+define <16 x i8> @fun14(<16 x i8> %a) {
+  %r = sdiv <16 x i8> %a, <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <16 x i8> %a, <i8 64
+}
+
+define <16 x i8> @fun15(<16 x i8> %a) {
+  %r = sdiv <16 x i8> %a, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %r = sdiv <16 x i8> %a, <i8 -128
+}
+
+; Scalar udiv
+
+define i64 @fun16(i64 %a) {
+  %r = udiv i64 %a, 2
+  ret i64 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i64 %a, 2
+}
+
+define i32 @fun17(i32 %a) {
+  %r = udiv i32 %a, 8
+  ret i32 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i32 %a, 8
+}
+
+define i16 @fun18(i16 %a) {
+  %r = udiv i16 %a, 32
+  ret i16 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i16 %a, 32
+}
+
+define i8 @fun19(i8 %a) {
+  %r = udiv i8 %a, 128
+  ret i8 %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv i8 %a, -128
+}
+
+; Vector udiv
+
+define <2 x i64> @fun20(<2 x i64> %a) {
+  %r = udiv <2 x i64> %a, <i64 2, i64 2>
+  ret <2 x i64> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <2 x i64> %a, <i64 2
+}
+
+define <4 x i32> @fun21(<4 x i32> %a) {
+  %r = udiv <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <4 x i32> %a, <i32 8
+}
+
+define <8 x i16> @fun22(<8 x i16> %a) {
+  %r = udiv <8 x i16> %a, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <8 x i16> %a, <i16 32
+}
+
+define <16 x i8> @fun23(<16 x i8> %a) {
+  %r = udiv <16 x i8> %a, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
+  ret <16 x i8> %r
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %r = udiv <16 x i8> %a, <i8 -128
+}
diff --git a/test/Analysis/CostModel/X86/bitreverse.ll b/test/Analysis/CostModel/X86/bitreverse.ll
index 8d5e1421eb82..9321b7323b57 100644
--- a/test/Analysis/CostModel/X86/bitreverse.ll
+++ b/test/Analysis/CostModel/X86/bitreverse.ll
@@ -2,10 +2,14 @@
 ; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE42
 ; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX
 ; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX2
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512F
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512BW
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE42
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512F
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512BW
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
 
@@ -64,12 +68,18 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
 declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
 
+declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
+declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
+declare <32 x i16> @llvm.bitreverse.v32i16(<32 x i16>)
+declare <64 x i8> @llvm.bitreverse.v64i8(<64 x i8>)
+
 define <2 x i64> @var_bitreverse_v2i64(<2 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v2i64':
 ; SSE2: Found an estimated cost of 29 for instruction:   %bitreverse
 ; SSE42: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 1 for instruction:   %bitreverse
   %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
   ret <2 x i64> %bitreverse
@@ -81,17 +91,32 @@ define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) {
 ; SSE42: Found an estimated cost of 10 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 12 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 4 for instruction:   %bitreverse
   %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
   ret <4 x i64> %bitreverse
 }
 
+define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i64':
+; SSE2: Found an estimated cost of 116 for instruction:   %bitreverse
+; SSE42: Found an estimated cost of 20 for instruction:   %bitreverse
+; AVX: Found an estimated cost of 24 for instruction:   %bitreverse
+; AVX2: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512F: Found an estimated cost of 36 for instruction:   %bitreverse
+; AVX512BW: Found an estimated cost of 5 for instruction:   %bitreverse
+; XOP: Found an estimated cost of 8 for instruction:   %bitreverse
+  %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a)
+  ret <8 x i64> %bitreverse
+}
+
 define <4 x i32> @var_bitreverse_v4i32(<4 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v4i32':
 ; SSE2: Found an estimated cost of 27 for instruction:   %bitreverse
 ; SSE42: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 1 for instruction:   %bitreverse
   %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
   ret <4 x i32> %bitreverse
@@ -103,17 +128,32 @@ define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) {
 ; SSE42: Found an estimated cost of 10 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 12 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 4 for instruction:   %bitreverse
   %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
   ret <8 x i32> %bitreverse
 }
 
+define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i32':
+; SSE2: Found an estimated cost of 108 for instruction:   %bitreverse
+; SSE42: Found an estimated cost of 20 for instruction:   %bitreverse
+; AVX: Found an estimated cost of 24 for instruction:   %bitreverse
+; AVX2: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512F: Found an estimated cost of 24 for instruction:   %bitreverse
+; AVX512BW: Found an estimated cost of 5 for instruction:   %bitreverse
+; XOP: Found an estimated cost of 8 for instruction:   %bitreverse
+  %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a)
+  ret <16 x i32> %bitreverse
+}
+
 define <8 x i16> @var_bitreverse_v8i16(<8 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i16':
 ; SSE2: Found an estimated cost of 27 for instruction:   %bitreverse
 ; SSE42: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 1 for instruction:   %bitreverse
   %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
   ret <8 x i16> %bitreverse
@@ -125,17 +165,32 @@ define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) {
 ; SSE42: Found an estimated cost of 10 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 12 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 4 for instruction:   %bitreverse
   %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
   ret <16 x i16> %bitreverse
 }
 
+define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v32i16':
+; SSE2: Found an estimated cost of 108 for instruction:   %bitreverse
+; SSE42: Found an estimated cost of 20 for instruction:   %bitreverse
+; AVX: Found an estimated cost of 24 for instruction:   %bitreverse
+; AVX2: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512F: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512BW: Found an estimated cost of 5 for instruction:   %bitreverse
+; XOP: Found an estimated cost of 8 for instruction:   %bitreverse
+  %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a)
+  ret <32 x i16> %bitreverse
+}
+
 define <16 x i8> @var_bitreverse_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i8':
 ; SSE2: Found an estimated cost of 20 for instruction:   %bitreverse
 ; SSE42: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 5 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 1 for instruction:   %bitreverse
   %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
   ret <16 x i8> %bitreverse
@@ -147,7 +202,21 @@ define <32 x i8> @var_bitreverse_v32i8(<32 x i8> %a) {
 ; SSE42: Found an estimated cost of 10 for instruction:   %bitreverse
 ; AVX: Found an estimated cost of 12 for instruction:   %bitreverse
 ; AVX2: Found an estimated cost of 5 for instruction:   %bitreverse
+; AVX512: Found an estimated cost of 5 for instruction:   %bitreverse
 ; XOP: Found an estimated cost of 4 for instruction:   %bitreverse
   %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
   ret <32 x i8> %bitreverse
 }
+
+define <64 x i8> @var_bitreverse_v64i8(<64 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v64i8':
+; SSE2: Found an estimated cost of 80 for instruction:   %bitreverse
+; SSE42: Found an estimated cost of 20 for instruction:   %bitreverse
+; AVX: Found an estimated cost of 24 for instruction:   %bitreverse
+; AVX2: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512F: Found an estimated cost of 10 for instruction:   %bitreverse
+; AVX512BW: Found an estimated cost of 5 for instruction:   %bitreverse
+; XOP: Found an estimated cost of 8 for instruction:   %bitreverse
+  %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a)
+  ret <64 x i8> %bitreverse
+}
diff --git a/test/Analysis/CostModel/X86/ctbits-cost.ll b/test/Analysis/CostModel/X86/ctbits-cost.ll
deleted file mode 100644
index aaf092c7b1d7..000000000000
--- a/test/Analysis/CostModel/X86/ctbits-cost.ll
+++ /dev/null
@@ -1,587 +0,0 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
-
-; Verify the cost of scalar population count instructions.
-
-declare i64 @llvm.ctpop.i64(i64)
-declare i32 @llvm.ctpop.i32(i32)
-declare i16 @llvm.ctpop.i16(i16)
-declare  i8 @llvm.ctpop.i8(i8)
-
-define i64 @var_ctpop_i64(i64 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64':
-; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
-; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
-  %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
-  ret i64 %ctpop
-}
-
-define i32 @var_ctpop_i32(i32 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32':
-; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
-; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
-  %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
-  ret i32 %ctpop
-}
-
-define i16 @var_ctpop_i16(i16 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16':
-; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
-; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
-  %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
-  ret i16 %ctpop
-}
-
-define i8 @var_ctpop_i8(i8 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8':
-; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
-; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
-  %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
-  ret i8 %ctpop
-}
-
-; Verify the cost of vector population count instructions.
-
-declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
-declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
-declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
-declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
-
-declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
-declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
-declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
-declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
-
-define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
-; SSE2: Found an estimated cost of 12 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 7 for instruction:   %ctpop
-; AVX: Found an estimated cost of 7 for instruction:   %ctpop
-  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
-  ret <2 x i64> %ctpop
-}
-
-define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
-; SSE2: Found an estimated cost of 24 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 14 for instruction:   %ctpop
-; AVX1: Found an estimated cost of 16 for instruction:   %ctpop
-; AVX2: Found an estimated cost of 7 for instruction:   %ctpop
-  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
-  ret <4 x i64> %ctpop
-}
-
-define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
-; SSE2: Found an estimated cost of 15 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 11 for instruction:   %ctpop
-; AVX: Found an estimated cost of 11 for instruction:   %ctpop
-  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
-  ret <4 x i32> %ctpop
-}
-
-define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
-; SSE2: Found an estimated cost of 30 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 22 for instruction:   %ctpop
-; AVX1: Found an estimated cost of 24 for instruction:   %ctpop
-; AVX2: Found an estimated cost of 11 for instruction:   %ctpop
-  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
-  ret <8 x i32> %ctpop
-}
-
-define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
-; SSE2: Found an estimated cost of 13 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 9 for instruction:   %ctpop
-; AVX: Found an estimated cost of 9 for instruction:   %ctpop
-  %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
-  ret <8 x i16> %ctpop
-}
-
-define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
-; SSE2: Found an estimated cost of 26 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 18 for instruction:   %ctpop
-; AVX1: Found an estimated cost of 20 for instruction:   %ctpop
-; AVX2: Found an estimated cost of 9 for instruction:   %ctpop
-  %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
-  ret <16 x i16> %ctpop
-}
-
-define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
-; SSE2: Found an estimated cost of 10 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 6 for instruction:   %ctpop
-; AVX: Found an estimated cost of 6 for instruction:   %ctpop
-  %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-  ret <16 x i8> %ctpop
-}
-
-define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
-; SSE2: Found an estimated cost of 20 for instruction:   %ctpop
-; SSE42: Found an estimated cost of 12 for instruction:   %ctpop
-; AVX1: Found an estimated cost of 14 for instruction:   %ctpop
-; AVX2: Found an estimated cost of 6 for instruction:   %ctpop
-  %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
-  ret <32 x i8> %ctpop
-}
-
-; Verify the cost of scalar leading zero count instructions.
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i32 @llvm.ctlz.i32(i32, i1)
-declare i16 @llvm.ctlz.i16(i16, i1)
-declare  i8 @llvm.ctlz.i8(i8, i1)
-
-define i64 @var_ctlz_i64(i64 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
-  ret i64 %ctlz
-}
-
-define i64 @var_ctlz_i64u(i64 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
-  ret i64 %ctlz
-}
-
-define i32 @var_ctlz_i32(i32 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
-  ret i32 %ctlz
-}
-
-define i32 @var_ctlz_i32u(i32 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
-  ret i32 %ctlz
-}
-
-define i16 @var_ctlz_i16(i16 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
-  ret i16 %ctlz
-}
-
-define i16 @var_ctlz_i16u(i16 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
-  ret i16 %ctlz
-}
-
-define i8 @var_ctlz_i8(i8 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
-  ret i8 %ctlz
-}
-
-define i8 @var_ctlz_i8u(i8 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u':
-; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
-  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
-  ret i8 %ctlz
-}
-
-; Verify the cost of vector leading zero count instructions.
-
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
-declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
-declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
-
-declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
-declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
-declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
-declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
-
-define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
-; SSE2: Found an estimated cost of 25 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 23 for instruction:   %ctlz
-; AVX: Found an estimated cost of 23 for instruction:   %ctlz
-  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
-  ret <2 x i64> %ctlz
-}
-
-define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
-; SSE2: Found an estimated cost of 25 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 23 for instruction:   %ctlz
-; AVX: Found an estimated cost of 23 for instruction:   %ctlz
-  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
-  ret <2 x i64> %ctlz
-}
-
-define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
-; SSE2: Found an estimated cost of 50 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 46 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 48 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 23 for instruction:   %ctlz
-  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
-  ret <4 x i64> %ctlz
-}
-
-define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
-; SSE2: Found an estimated cost of 50 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 46 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 48 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 23 for instruction:   %ctlz
-  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
-  ret <4 x i64> %ctlz
-}
-
-define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
-; SSE2: Found an estimated cost of 26 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
-; AVX: Found an estimated cost of 18 for instruction:   %ctlz
-  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
-  ret <4 x i32> %ctlz
-}
-
-define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
-; SSE2: Found an estimated cost of 26 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
-; AVX: Found an estimated cost of 18 for instruction:   %ctlz
-  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
-  ret <4 x i32> %ctlz
-}
-
-define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
-; SSE2: Found an estimated cost of 52 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 36 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 38 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 18 for instruction:   %ctlz
-  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
-  ret <8 x i32> %ctlz
-}
-
-define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
-; SSE2: Found an estimated cost of 52 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 36 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 38 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 18 for instruction:   %ctlz
-  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
-  ret <8 x i32> %ctlz
-}
-
-define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
-; SSE2: Found an estimated cost of 20 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 14 for instruction:   %ctlz
-; AVX: Found an estimated cost of 14 for instruction:   %ctlz
-  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
-  ret <8 x i16> %ctlz
-}
-
-define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
-; SSE2: Found an estimated cost of 20 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 14 for instruction:   %ctlz
-; AVX: Found an estimated cost of 14 for instruction:   %ctlz
-  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
-  ret <8 x i16> %ctlz
-}
-
-define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
-; SSE2: Found an estimated cost of 40 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 28 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 30 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 14 for instruction:   %ctlz
-  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
-  ret <16 x i16> %ctlz
-}
-
-define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
-; SSE2: Found an estimated cost of 40 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 28 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 30 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 14 for instruction:   %ctlz
-  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
-  ret <16 x i16> %ctlz
-}
-
-define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
-; SSE2: Found an estimated cost of 17 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 9 for instruction:   %ctlz
-; AVX: Found an estimated cost of 9 for instruction:   %ctlz
-  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
-  ret <16 x i8> %ctlz
-}
-
-define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
-; SSE2: Found an estimated cost of 17 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 9 for instruction:   %ctlz
-; AVX: Found an estimated cost of 9 for instruction:   %ctlz
-  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
-  ret <16 x i8> %ctlz
-}
-
-define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
-; SSE2: Found an estimated cost of 34 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 20 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 9 for instruction:   %ctlz
-  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
-  ret <32 x i8> %ctlz
-}
-
-define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
-; SSE2: Found an estimated cost of 34 for instruction:   %ctlz
-; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
-; AVX1: Found an estimated cost of 20 for instruction:   %ctlz
-; AVX2: Found an estimated cost of 9 for instruction:   %ctlz
-  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
-  ret <32 x i8> %ctlz
-}
-
-; Verify the cost of scalar trailing zero count instructions.
-
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare i16 @llvm.cttz.i16(i16, i1)
-declare  i8 @llvm.cttz.i8(i8, i1)
-
-define i64 @var_cttz_i64(i64 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
-  ret i64 %cttz
-}
-
-define i64 @var_cttz_i64u(i64 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
-  ret i64 %cttz
-}
-
-define i32 @var_cttz_i32(i32 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
-  ret i32 %cttz
-}
-
-define i32 @var_cttz_i32u(i32 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
-  ret i32 %cttz
-}
-
-define i16 @var_cttz_i16(i16 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
-  ret i16 %cttz
-}
-
-define i16 @var_cttz_i16u(i16 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
-  ret i16 %cttz
-}
-
-define i8 @var_cttz_i8(i8 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
-  ret i8 %cttz
-}
-
-define i8 @var_cttz_i8u(i8 %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u':
-; CHECK: Found an estimated cost of 1 for instruction:   %cttz
-  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
-  ret i8 %cttz
-}
-
-; Verify the cost of vector trailing zero count instructions.
-
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
-declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
-declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
-declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
-
-declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
-declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
-declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
-declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
-
-define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
-; SSE2: Found an estimated cost of 14 for instruction:   %cttz
-; SSE42: Found an estimated cost of 10 for instruction:   %cttz
-; AVX: Found an estimated cost of 10 for instruction:   %cttz
-  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
-  ret <2 x i64> %cttz
-}
-
-define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
-; SSE2: Found an estimated cost of 14 for instruction:   %cttz
-; SSE42: Found an estimated cost of 10 for instruction:   %cttz
-; AVX: Found an estimated cost of 10 for instruction:   %cttz
-  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
-  ret <2 x i64> %cttz
-}
-
-define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
-; SSE2: Found an estimated cost of 28 for instruction:   %cttz
-; SSE42: Found an estimated cost of 20 for instruction:   %cttz
-; AVX1: Found an estimated cost of 22 for instruction:   %cttz
-; AVX2: Found an estimated cost of 10 for instruction:   %cttz
-  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
-  ret <4 x i64> %cttz
-}
-
-define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
-; SSE2: Found an estimated cost of 28 for instruction:   %cttz
-; SSE42: Found an estimated cost of 20 for instruction:   %cttz
-; AVX1: Found an estimated cost of 22 for instruction:   %cttz
-; AVX2: Found an estimated cost of 10 for instruction:   %cttz
-  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
-  ret <4 x i64> %cttz
-}
-
-define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
-; SSE2: Found an estimated cost of 18 for instruction:   %cttz
-; SSE42: Found an estimated cost of 14 for instruction:   %cttz
-; AVX: Found an estimated cost of 14 for instruction:   %cttz
-  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
-  ret <4 x i32> %cttz
-}
-
-define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
-; SSE2: Found an estimated cost of 18 for instruction:   %cttz
-; SSE42: Found an estimated cost of 14 for instruction:   %cttz
-; AVX: Found an estimated cost of 14 for instruction:   %cttz
-  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
-  ret <4 x i32> %cttz
-}
-
-define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
-; SSE2: Found an estimated cost of 36 for instruction:   %cttz
-; SSE42: Found an estimated cost of 28 for instruction:   %cttz
-; AVX1: Found an estimated cost of 30 for instruction:   %cttz
-; AVX2: Found an estimated cost of 14 for instruction:   %cttz
-  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
-  ret <8 x i32> %cttz
-}
-
-define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
-; SSE2: Found an estimated cost of 36 for instruction:   %cttz
-; SSE42: Found an estimated cost of 28 for instruction:   %cttz
-; AVX1: Found an estimated cost of 30 for instruction:   %cttz
-; AVX2: Found an estimated cost of 14 for instruction:   %cttz
-  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
-  ret <8 x i32> %cttz
-}
-
-define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
-; SSE2: Found an estimated cost of 16 for instruction:   %cttz
-; SSE42: Found an estimated cost of 12 for instruction:   %cttz
-; AVX: Found an estimated cost of 12 for instruction:   %cttz
-  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
-  ret <8 x i16> %cttz
-}
-
-define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
-; SSE2: Found an estimated cost of 16 for instruction:   %cttz
-; SSE42: Found an estimated cost of 12 for instruction:   %cttz
-; AVX: Found an estimated cost of 12 for instruction:   %cttz
-  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
-  ret <8 x i16> %cttz
-}
-
-define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
-; SSE2: Found an estimated cost of 32 for instruction:   %cttz
-; SSE42: Found an estimated cost of 24 for instruction:   %cttz
-; AVX1: Found an estimated cost of 26 for instruction:   %cttz
-; AVX2: Found an estimated cost of 12 for instruction:   %cttz
-  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
-  ret <16 x i16> %cttz
-}
-
-define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
-; SSE2: Found an estimated cost of 32 for instruction:   %cttz
-; SSE42: Found an estimated cost of 24 for instruction:   %cttz
-; AVX1: Found an estimated cost of 26 for instruction:   %cttz
-; AVX2: Found an estimated cost of 12 for instruction:   %cttz
-  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
-  ret <16 x i16> %cttz
-}
-
-define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
-; SSE2: Found an estimated cost of 13 for instruction:   %cttz
-; SSE42: Found an estimated cost of 9 for instruction:   %cttz
-; AVX: Found an estimated cost of 9 for instruction:   %cttz
-  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
-  ret <16 x i8> %cttz
-}
-
-define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
-; SSE2: Found an estimated cost of 13 for instruction:   %cttz
-; SSE42: Found an estimated cost of 9 for instruction:   %cttz
-; AVX: Found an estimated cost of 9 for instruction:   %cttz
-  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
-  ret <16 x i8> %cttz
-}
-
-define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
-; SSE2: Found an estimated cost of 26 for instruction:   %cttz
-; SSE42: Found an estimated cost of 18 for instruction:   %cttz
-; AVX1: Found an estimated cost of 20 for instruction:   %cttz
-; AVX2: Found an estimated cost of 9 for instruction:   %cttz
-  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
-  ret <32 x i8> %cttz
-}
-
-define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
-; SSE2: Found an estimated cost of 26 for instruction:   %cttz
-; SSE42: Found an estimated cost of 18 for instruction:   %cttz
-; AVX1: Found an estimated cost of 20 for instruction:   %cttz
-; AVX2: Found an estimated cost of 9 for instruction:   %cttz
-  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
-  ret <32 x i8> %cttz
-}
diff --git a/test/Analysis/CostModel/X86/ctlz.ll b/test/Analysis/CostModel/X86/ctlz.ll
new file mode 100644
index 000000000000..2c97da15aee5
--- /dev/null
+++ b/test/Analysis/CostModel/X86/ctlz.ll
@@ -0,0 +1,233 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+
+; Verify the cost of scalar leading zero count instructions.
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare  i8 @llvm.ctlz.i8(i8, i1)
+
+define i64 @var_ctlz_i64(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
+  ret i64 %ctlz
+}
+
+define i64 @var_ctlz_i64u(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
+  ret i64 %ctlz
+}
+
+define i32 @var_ctlz_i32(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
+  ret i32 %ctlz
+}
+
+define i32 @var_ctlz_i32u(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
+  ret i32 %ctlz
+}
+
+define i16 @var_ctlz_i16(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
+  ret i16 %ctlz
+}
+
+define i16 @var_ctlz_i16u(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
+  ret i16 %ctlz
+}
+
+define i8 @var_ctlz_i8(i8 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
+  ret i8 %ctlz
+}
+
+define i8 @var_ctlz_i8u(i8 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u':
+; CHECK: Found an estimated cost of 1 for instruction:   %ctlz
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
+  ret i8 %ctlz
+}
+
+; Verify the cost of vector leading zero count instructions.
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
+
+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
+
+define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
+; SSE2: Found an estimated cost of 25 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 23 for instruction:   %ctlz
+; AVX: Found an estimated cost of 23 for instruction:   %ctlz
+  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %ctlz
+}
+
+define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
+; SSE2: Found an estimated cost of 25 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 23 for instruction:   %ctlz
+; AVX: Found an estimated cost of 23 for instruction:   %ctlz
+  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
+  ret <2 x i64> %ctlz
+}
+
+define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
+; SSE2: Found an estimated cost of 50 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 46 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 48 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 23 for instruction:   %ctlz
+  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %ctlz
+}
+
+define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
+; SSE2: Found an estimated cost of 50 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 46 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 48 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 23 for instruction:   %ctlz
+  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
+  ret <4 x i64> %ctlz
+}
+
+define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
+; SSE2: Found an estimated cost of 26 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
+; AVX: Found an estimated cost of 18 for instruction:   %ctlz
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %ctlz
+}
+
+define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
+; SSE2: Found an estimated cost of 26 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
+; AVX: Found an estimated cost of 18 for instruction:   %ctlz
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
+  ret <4 x i32> %ctlz
+}
+
+define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
+; SSE2: Found an estimated cost of 52 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 36 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 38 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 18 for instruction:   %ctlz
+  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
+  ret <8 x i32> %ctlz
+}
+
+define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
+; SSE2: Found an estimated cost of 52 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 36 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 38 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 18 for instruction:   %ctlz
+  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
+  ret <8 x i32> %ctlz
+}
+
+define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
+; SSE2: Found an estimated cost of 20 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 14 for instruction:   %ctlz
+; AVX: Found an estimated cost of 14 for instruction:   %ctlz
+  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %ctlz
+}
+
+define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
+; SSE2: Found an estimated cost of 20 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 14 for instruction:   %ctlz
+; AVX: Found an estimated cost of 14 for instruction:   %ctlz
+  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
+  ret <8 x i16> %ctlz
+}
+
+define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
+; SSE2: Found an estimated cost of 40 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 28 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 30 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 14 for instruction:   %ctlz
+  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %ctlz
+}
+
+define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
+; SSE2: Found an estimated cost of 40 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 28 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 30 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 14 for instruction:   %ctlz
+  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
+  ret <16 x i16> %ctlz
+}
+
+define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
+; SSE2: Found an estimated cost of 17 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 9 for instruction:   %ctlz
+; AVX: Found an estimated cost of 9 for instruction:   %ctlz
+  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %ctlz
+}
+
+define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
+; SSE2: Found an estimated cost of 17 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 9 for instruction:   %ctlz
+; AVX: Found an estimated cost of 9 for instruction:   %ctlz
+  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
+  ret <16 x i8> %ctlz
+}
+
+define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
+; SSE2: Found an estimated cost of 34 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 20 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 9 for instruction:   %ctlz
+  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %ctlz
+}
+
+define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
+; SSE2: Found an estimated cost of 34 for instruction:   %ctlz
+; SSE42: Found an estimated cost of 18 for instruction:   %ctlz
+; AVX1: Found an estimated cost of 20 for instruction:   %ctlz
+; AVX2: Found an estimated cost of 9 for instruction:   %ctlz
+  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
+  ret <32 x i8> %ctlz
+}
diff --git a/test/Analysis/CostModel/X86/ctpop.ll b/test/Analysis/CostModel/X86/ctpop.ll
new file mode 100644
index 000000000000..f072cbaec492
--- /dev/null
+++ b/test/Analysis/CostModel/X86/ctpop.ll
@@ -0,0 +1,133 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+
+; Verify the cost of scalar population count instructions.
+
+declare i64 @llvm.ctpop.i64(i64)
+declare i32 @llvm.ctpop.i32(i32)
+declare i16 @llvm.ctpop.i16(i16)
+declare  i8 @llvm.ctpop.i8(i8)
+
+define i64 @var_ctpop_i64(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64':
+; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
+; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %ctpop
+}
+
+define i32 @var_ctpop_i32(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32':
+; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
+; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
+  %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %ctpop
+}
+
+define i16 @var_ctpop_i16(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16':
+; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
+; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
+  %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+  ret i16 %ctpop
+}
+
+define i8 @var_ctpop_i8(i8 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8':
+; NOPOPCNT: Found an estimated cost of 4 for instruction:   %ctpop
+; POPCNT: Found an estimated cost of 1 for instruction:   %ctpop
+  %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
+  ret i8 %ctpop
+}
+
+; Verify the cost of vector population count instructions.
+
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
+
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
+declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
+
+define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 7 for instruction:   %ctpop
+; AVX: Found an estimated cost of 7 for instruction:   %ctpop
+  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %ctpop
+}
+
+define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 14 for instruction:   %ctpop
+; AVX1: Found an estimated cost of 16 for instruction:   %ctpop
+; AVX2: Found an estimated cost of 7 for instruction:   %ctpop
+  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
+  ret <4 x i64> %ctpop
+}
+
+define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
+; SSE2: Found an estimated cost of 15 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 11 for instruction:   %ctpop
+; AVX: Found an estimated cost of 11 for instruction:   %ctpop
+  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+  ret <4 x i32> %ctpop
+}
+
+define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
+; SSE2: Found an estimated cost of 30 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 22 for instruction:   %ctpop
+; AVX1: Found an estimated cost of 24 for instruction:   %ctpop
+; AVX2: Found an estimated cost of 11 for instruction:   %ctpop
+  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
+  ret <8 x i32> %ctpop
+}
+
+define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
+; SSE2: Found an estimated cost of 13 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 9 for instruction:   %ctpop
+; AVX: Found an estimated cost of 9 for instruction:   %ctpop
+  %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+  ret <8 x i16> %ctpop
+}
+
+define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
+; SSE2: Found an estimated cost of 26 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 18 for instruction:   %ctpop
+; AVX1: Found an estimated cost of 20 for instruction:   %ctpop
+; AVX2: Found an estimated cost of 9 for instruction:   %ctpop
+  %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
+  ret <16 x i16> %ctpop
+}
+
+define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
+; SSE2: Found an estimated cost of 10 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 6 for instruction:   %ctpop
+; AVX: Found an estimated cost of 6 for instruction:   %ctpop
+  %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+  ret <16 x i8> %ctpop
+}
+
+define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
+; SSE2: Found an estimated cost of 20 for instruction:   %ctpop
+; SSE42: Found an estimated cost of 12 for instruction:   %ctpop
+; AVX1: Found an estimated cost of 14 for instruction:   %ctpop
+; AVX2: Found an estimated cost of 6 for instruction:   %ctpop
+  %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
+  ret <32 x i8> %ctpop
+}
diff --git a/test/Analysis/CostModel/X86/cttz.ll b/test/Analysis/CostModel/X86/cttz.ll
new file mode 100644
index 000000000000..5d3c59b60232
--- /dev/null
+++ b/test/Analysis/CostModel/X86/cttz.ll
@@ -0,0 +1,233 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
+
+; Verify the cost of scalar trailing zero count instructions.
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare  i8 @llvm.cttz.i8(i8, i1)
+
+define i64 @var_cttz_i64(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
+  ret i64 %cttz
+}
+
+define i64 @var_cttz_i64u(i64 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
+  ret i64 %cttz
+}
+
+define i32 @var_cttz_i32(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
+  ret i32 %cttz
+}
+
+define i32 @var_cttz_i32u(i32 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
+  ret i32 %cttz
+}
+
+define i16 @var_cttz_i16(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
+  ret i16 %cttz
+}
+
+define i16 @var_cttz_i16u(i16 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
+  ret i16 %cttz
+}
+
+define i8 @var_cttz_i8(i8 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
+  ret i8 %cttz
+}
+
+define i8 @var_cttz_i8u(i8 %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u':
+; CHECK: Found an estimated cost of 1 for instruction:   %cttz
+  %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
+  ret i8 %cttz
+}
+
+; Verify the cost of vector trailing zero count instructions.
+
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
+
+declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
+declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
+declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
+declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
+
+define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
+; SSE2: Found an estimated cost of 14 for instruction:   %cttz
+; SSE42: Found an estimated cost of 10 for instruction:   %cttz
+; AVX: Found an estimated cost of 10 for instruction:   %cttz
+  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
+  ret <2 x i64> %cttz
+}
+
+define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
+; SSE2: Found an estimated cost of 14 for instruction:   %cttz
+; SSE42: Found an estimated cost of 10 for instruction:   %cttz
+; AVX: Found an estimated cost of 10 for instruction:   %cttz
+  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
+  ret <2 x i64> %cttz
+}
+
+define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
+; SSE2: Found an estimated cost of 28 for instruction:   %cttz
+; SSE42: Found an estimated cost of 20 for instruction:   %cttz
+; AVX1: Found an estimated cost of 22 for instruction:   %cttz
+; AVX2: Found an estimated cost of 10 for instruction:   %cttz
+  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
+  ret <4 x i64> %cttz
+}
+
+define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
+; SSE2: Found an estimated cost of 28 for instruction:   %cttz
+; SSE42: Found an estimated cost of 20 for instruction:   %cttz
+; AVX1: Found an estimated cost of 22 for instruction:   %cttz
+; AVX2: Found an estimated cost of 10 for instruction:   %cttz
+  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
+  ret <4 x i64> %cttz
+}
+
+define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
+; SSE2: Found an estimated cost of 18 for instruction:   %cttz
+; SSE42: Found an estimated cost of 14 for instruction:   %cttz
+; AVX: Found an estimated cost of 14 for instruction:   %cttz
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
+  ret <4 x i32> %cttz
+}
+
+define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
+; SSE2: Found an estimated cost of 18 for instruction:   %cttz
+; SSE42: Found an estimated cost of 14 for instruction:   %cttz
+; AVX: Found an estimated cost of 14 for instruction:   %cttz
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
+  ret <4 x i32> %cttz
+}
+
+define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
+; SSE2: Found an estimated cost of 36 for instruction:   %cttz
+; SSE42: Found an estimated cost of 28 for instruction:   %cttz
+; AVX1: Found an estimated cost of 30 for instruction:   %cttz
+; AVX2: Found an estimated cost of 14 for instruction:   %cttz
+  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
+  ret <8 x i32> %cttz
+}
+
+define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
+; SSE2: Found an estimated cost of 36 for instruction:   %cttz
+; SSE42: Found an estimated cost of 28 for instruction:   %cttz
+; AVX1: Found an estimated cost of 30 for instruction:   %cttz
+; AVX2: Found an estimated cost of 14 for instruction:   %cttz
+  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
+  ret <8 x i32> %cttz
+}
+
+define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
+; SSE2: Found an estimated cost of 16 for instruction:   %cttz
+; SSE42: Found an estimated cost of 12 for instruction:   %cttz
+; AVX: Found an estimated cost of 12 for instruction:   %cttz
+  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
+  ret <8 x i16> %cttz
+}
+
+define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
+; SSE2: Found an estimated cost of 16 for instruction:   %cttz
+; SSE42: Found an estimated cost of 12 for instruction:   %cttz
+; AVX: Found an estimated cost of 12 for instruction:   %cttz
+  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
+  ret <8 x i16> %cttz
+}
+
+define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %cttz
+; SSE42: Found an estimated cost of 24 for instruction:   %cttz
+; AVX1: Found an estimated cost of 26 for instruction:   %cttz
+; AVX2: Found an estimated cost of 12 for instruction:   %cttz
+  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
+  ret <16 x i16> %cttz
+}
+
+define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
+; SSE2: Found an estimated cost of 32 for instruction:   %cttz
+; SSE42: Found an estimated cost of 24 for instruction:   %cttz
+; AVX1: Found an estimated cost of 26 for instruction:   %cttz
+; AVX2: Found an estimated cost of 12 for instruction:   %cttz
+  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
+  ret <16 x i16> %cttz
+}
+
+define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
+; SSE2: Found an estimated cost of 13 for instruction:   %cttz
+; SSE42: Found an estimated cost of 9 for instruction:   %cttz
+; AVX: Found an estimated cost of 9 for instruction:   %cttz
+  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
+  ret <16 x i8> %cttz
+}
+
+define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
+; SSE2: Found an estimated cost of 13 for instruction:   %cttz
+; SSE42: Found an estimated cost of 9 for instruction:   %cttz
+; AVX: Found an estimated cost of 9 for instruction:   %cttz
+  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
+  ret <16 x i8> %cttz
+}
+
+define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %cttz
+; SSE42: Found an estimated cost of 18 for instruction:   %cttz
+; AVX1: Found an estimated cost of 20 for instruction:   %cttz
+; AVX2: Found an estimated cost of 9 for instruction:   %cttz
+  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
+  ret <32 x i8> %cttz
+}
+
+define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
+; SSE2: Found an estimated cost of 26 for instruction:   %cttz
+; SSE42: Found an estimated cost of 18 for instruction:   %cttz
+; AVX1: Found an estimated cost of 20 for instruction:   %cttz
+; AVX2: Found an estimated cost of 9 for instruction:   %cttz
+  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
+  ret <32 x i8> %cttz
+}
diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll
index 91797c062b88..e65992e9913d 100644
--- a/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,18 +1,16 @@
 ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
 
+; Function Attrs: nounwind readnone
+declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
+declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
+declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
+declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
+
 define i8 @add_B(<16 x i8>* %arr)  {
 ; CHECK-LABEL: add_B
 ; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
   %bin.rdx = load <16 x i8>, <16 x i8>* %arr
-  %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0
-  %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
-  %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf
-  %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
-  %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12
-  %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
-  %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13
-  %r = extractelement <16 x i8> %bin.rdx14, i32 0
+  %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %bin.rdx)
   ret i8 %r
 }
 
@@ -20,13 +18,7 @@ define i16 @add_H(<8 x i16>* %arr)  {
 ; CHECK-LABEL: add_H
 ; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
   %bin.rdx = load <8 x i16>, <8 x i16>* %arr
-  %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>
-  %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf
-  %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12
-  %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13
-  %r = extractelement <8 x i16> %bin.rdx14, i32 0
+  %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %bin.rdx)
   ret i16 %r
 }
 
@@ -34,11 +26,7 @@ define i32 @add_S( <4 x i32>* %arr)  {
 ; CHECK-LABEL: add_S
 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
   %bin.rdx = load <4 x i32>, <4 x i32>* %arr
-  %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf
-  %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12
-  %r = extractelement <4 x i32> %bin.rdx13, i32 0
+  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %bin.rdx)
   ret i32 %r
 }
 
@@ -46,12 +34,12 @@ define i64 @add_D(<2 x i64>* %arr)  {
 ; CHECK-LABEL: add_D
 ; CHECK-NOT: addv
   %bin.rdx = load <2 x i64>, <2 x i64>* %arr
-  %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
-  %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0
-  %r = extractelement <2 x i64> %bin.rdx0, i32 0
+  %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %bin.rdx)
   ret i64 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
+
 define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
 ; CHECK-LABEL: oversized_ADDV_256
 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
@@ -66,33 +54,16 @@ entry:
   %7 = icmp slt <8 x i32> %6, zeroinitializer
   %8 = sub nsw <8 x i32> zeroinitializer, %6
   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
-  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <8 x i32> %9, %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3
-  %10 = extractelement <8 x i32> %bin.rdx4, i32 0
-  ret i32 %10
+  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %9)
+  ret i32 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
+
 define i32 @oversized_ADDV_512(<16 x i32>* %arr)  {
 ; CHECK-LABEL: oversized_ADDV_512
 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
   %bin.rdx = load <16 x i32>, <16 x i32>* %arr
-
-  %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0
-
-  %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
-  %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf
-
-  %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
-  %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12
-
-  %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
-  %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13
-
-  %r = extractelement <16 x i32> %bin.rdx14, i32 0
+  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %bin.rdx)
   ret i32 %r
 }
diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 9a56cd6ae7c0..760a8f8419f9 100644
--- a/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -2,344 +2,148 @@
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
+declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
+declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
+declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
+
+declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
+
 ; CHECK-LABEL: smax_B
 ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @smax_B(<16 x i8>* nocapture readonly %arr)  {
   %arr.load = load <16 x i8>, <16 x i8>* %arr
-  %rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
 ; CHECK-LABEL: smax_H
 ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
-  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
-  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
-  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
-  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
-  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
-  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
-  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
-  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  %arr.load = load <8 x i16>, <8 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
 ; CHECK-LABEL: smax_S
 ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @smax_S(<4 x i32> * nocapture readonly %arr)  {
-  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
-  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
-  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
-  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
-  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
-  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
-  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  %arr.load = load <4 x i32>, <4 x i32>* %arr
+  %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
-; CHECK-LABEL: smax_D
-; CHECK-NOT: smaxv
-define i64 @smax_D(<2 x i64>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
-  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
-  %rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
-  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
-  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
-  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
-  ret i64 %r
-}
-
-
 ; CHECK-LABEL: umax_B
 ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @umax_B(<16 x i8>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
-  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  %arr.load = load <16 x i8>, <16 x i8>* %arr
+  %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
 ; CHECK-LABEL: umax_H
 ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @umax_H(<8 x i16>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
-  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
-  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
-  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
-  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
-  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
-  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
-  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
-  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  %arr.load = load <8 x i16>, <8 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
 ; CHECK-LABEL: umax_S
 ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
-  %rdx.minmax.select  = load <4 x i32>, <4 x i32>* %arr
-  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
-  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
-  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
-  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
-  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
-  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  %arr.load = load <4 x i32>, <4 x i32>* %arr
+  %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
-; CHECK-LABEL: umax_D
-; CHECK-NOT: umaxv
-define i64 @umax_D(<2 x i64>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
-  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
-  %rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
-  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
-  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
-  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
-  ret i64 %r
-}
-
-
 ; CHECK-LABEL: smin_B
 ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
-  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  %arr.load = load <16 x i8>, <16 x i8>* %arr
+  %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
 ; CHECK-LABEL: smin_H
 ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
-  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
-  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
-  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
-  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
-  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
-  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
-  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
-  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  %arr.load = load <8 x i16>, <8 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
 ; CHECK-LABEL: smin_S
 ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
-  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
-  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
-  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
-  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
-  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
-  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  %arr.load = load <4 x i32>, <4 x i32>* %arr
+  %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
-; CHECK-LABEL: smin_D
-; CHECK-NOT: sminv
-define i64 @smin_D(<2 x i64>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
-  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
-  %rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
-  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
-  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
-  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
-  ret i64 %r
-}
-
-
 ; CHECK-LABEL: umin_B
 ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @umin_B(<16 x i8>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
-  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  %arr.load = load <16 x i8>, <16 x i8>* %arr
+  %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
 ; CHECK-LABEL: umin_H
 ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @umin_H(<8 x i16>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
-  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
-  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25
-  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
-  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28
-  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
-  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
-  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
-  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  %arr.load = load <8 x i16>, <8 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
 ; CHECK-LABEL: umin_S
 ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
-  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
-  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
-  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20
-  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
-  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
-  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
-  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  %arr.load = load <4 x i32>, <4 x i32>* %arr
+  %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
-; CHECK-LABEL: umin_D
-; CHECK-NOT: uminv
-define i64 @umin_D(<2 x i64>* nocapture readonly %arr)  {
-  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
-  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
-  %rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
-  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
-  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
-  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
-  ret i64 %r
-}
-
 ; CHECK-LABEL: fmaxnm_S
 ; CHECK: fmaxnmv
 define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
-  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
-  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1
-  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
-  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
-  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
-  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  %arr.load  = load <4 x float>, <4 x float>* %arr
+  %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load)
   ret float %r
 }
 
 ; CHECK-LABEL: fminnm_S
 ; CHECK: fminnmv
 define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
-  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
-  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1
-  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
-  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
-  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
-  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  %arr.load  = load <4 x float>, <4 x float>* %arr
+  %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load)
   ret float %r
 }
 
+declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
+
 define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umax_256
 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: umaxv {{h[0-9]+}}, [[V0]]
-  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
-  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ugt <16 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ugt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ugt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ugt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  %arr.load = load <16 x i16>, <16 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
+
 define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umax_512
 ; CHECK: umax v
@@ -347,47 +151,23 @@ define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ugt <16 x i32> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ugt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ugt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ugt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
+declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
+
 define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umin_256
 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: uminv {{h[0-9]+}}, [[V0]]
-  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
-  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ult <16 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ult <16 x i16> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ult <16 x i16> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ult <16 x i16> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  %arr.load = load <16 x i16>, <16 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
+
 define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umin_512
 ; CHECK: umin v
@@ -395,47 +175,23 @@ define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp ult <16 x i32> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp ult <16 x i32> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp ult <16 x i32> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp ult <16 x i32> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
+declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
+
 define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smax_256
 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: smaxv {{h[0-9]+}}, [[V0]]
   %arr.load = load <16 x i16>, <16 x i16>* %arr
-  %rdx.shuf = shufflevector <16 x i16> %arr.load, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp sgt <16 x i16> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %arr.load, <16 x i16> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp sgt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp sgt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp sgt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
+
 define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smax_512
 ; CHECK: smax v
@@ -443,47 +199,23 @@ define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp sgt <16 x i32> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp sgt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp sgt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp sgt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
+declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
+
 define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smin_256
 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: sminv {{h[0-9]+}}, [[V0]]
-  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
-  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp slt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp slt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp slt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  %arr.load = load <16 x i16>, <16 x i16>* %arr
+  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
+declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
+
 define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smin_512
 ; CHECK: smin v
@@ -491,20 +223,6 @@ define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp22 = icmp slt <16 x i32> %arr.load, %rdx.shuf
-  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
-  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp25 = icmp slt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
-  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
-  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp28 = icmp slt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
-  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
-  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %rdx.minmax.cmp31 = icmp slt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
-  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
-  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
-  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
-  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index c7b0c33550d0..ff7a0a8300e2 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -134,8 +134,10 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   ret <2 x i64> %tmp4
 }
 
-define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) {
-; CHECK-LABEL: uabdl8h_log2_shuffle
+declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
+
+define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabdl8h_rdx
 ; CHECK: uabdl2.8h
 ; CHECK: uabdl.8h
   %aload = load <16 x i8>, <16 x i8>* %a, align 1
@@ -146,20 +148,14 @@ define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) {
   %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
   %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
   %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
-  %rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin1.rdx = add <16 x i16> %absel, %rdx.shuf
-  %rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx
-  %rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136
-  %rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138
-  %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0
+  %reduced_v = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %absel)
   ret i16 %reduced_v
 }
 
-define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) {
-; CHECK-LABEL: uabdl4s_log2_shuffle
+declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
+
+define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
+; CHECK-LABEL: uabdl4s_rdx
 ; CHECK: uabdl2.4s
 ; CHECK: uabdl.4s
   %aload = load <8 x i16>, <8 x i16>* %a, align 1
@@ -170,18 +166,14 @@ define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) {
   %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
   %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
   %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
-  %rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <8 x i32> %absel, %rdx.shuf
-  %rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136
-  %rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138
-  %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0
+  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %absel)
   ret i32 %reduced_v
 }
 
-define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
-; CHECK: uabdl2d_log2_shuffle
+declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
+
+define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
+; CHECK: uabdl2d_rdx
 ; CHECK: uabdl2.2d
 ; CHECK: uabdl.2d
   %aload = load <4 x i32>, <4 x i32>* %a, align 1
@@ -192,11 +184,7 @@ define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
   %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
   %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
   %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
-  %rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136
-  %rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138
-  %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0
+  %reduced_v = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %absel)
   ret i64 %reduced_v
 }
 
diff --git a/test/CodeGen/AArch64/ldst-zero.ll b/test/CodeGen/AArch64/ldst-zero.ll
index 95b92ac70879..7d443a631f91 100644
--- a/test/CodeGen/AArch64/ldst-zero.ll
+++ b/test/CodeGen/AArch64/ldst-zero.ll
@@ -9,9 +9,9 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
 ; Original test case which exhibited the bug
 define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) {
 ; CHECK-LABEL: test1:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str w1, [x0, #16]
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
 entry:
   %0 = bitcast %struct.tree_common* %t to i8*
   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
@@ -25,10 +25,8 @@ entry:
 ; Store to each struct element instead of using memset
 define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) {
 ; CHECK-LABEL: test2:
-; CHECK: stp xzr, xzr, [x0]
-; CHECK: str wzr, [x0, #16]
-; CHECK: str w1, [x0, #16]
-; CHECK: str x2, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: stp xzr, x2, [x0]
 entry:
   %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0
   %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
@@ -44,9 +42,9 @@ entry:
 ; Vector store instead of memset
 define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) {
 ; CHECK-LABEL: test3:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str w1, [x0, #16]
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
 entry:
   %0 = bitcast %struct.tree_common* %t to <3 x i64>*
   store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8
@@ -60,9 +58,8 @@ entry:
 ; Vector store, then store to vector elements
 define void @test4(<3 x i64>* %p, i64 %x, i64 %y) {
 ; CHECK-LABEL: test4:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str x1, [x0, #16]
+; CHECK-DAG: stp x2, x1, [x0, #8]
+; CHECK-DAG: str xzr, [x0]
 entry:
   store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8
   %0 = bitcast <3 x i64>* %p to i64*
diff --git a/test/CodeGen/AArch64/misched-stp.ll b/test/CodeGen/AArch64/misched-stp.ll
index 4ea481cae68e..1c9ea68834c2 100644
--- a/test/CodeGen/AArch64/misched-stp.ll
+++ b/test/CodeGen/AArch64/misched-stp.ll
@@ -1,20 +1,18 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
 
 ; Tests to check that the scheduler dependencies derived from alias analysis are
 ; correct when we have loads that have been split up so that they can later be
 ; merged into STP.
 
-; CHECK: ********** MI Scheduling **********
-; CHECK: test_splat:BB#0 entry
-; CHECK: SU({{[0-9]+}}):   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU1:SU\([0-9]+\)]]
-; CHECK: SU({{[0-9]+}}):   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU2:SU\([0-9]+\)]]
-; CHECK: [[SU1]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2]
-; CHECK: [[SU2]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1]
+; Now that overwritten stores are elided in SelectionDAG, dependencies
+; are resolved and removed before MISCHED. Check that we have
+; equivalent pair of stp calls as a baseline.
+
+; CHECK-LABEL: test_splat
+; CHECK:     ldr [[REG:w[0-9]+]], [x2]
+; CHECK-DAG: stp w0, [[REG]], [x2, #12]
+; CHECK-DAG: stp [[REG]], w1, [x2, #4]
 define void @test_splat(i32 %x, i32 %y, i32* %p) {
 entry:
   %val = load i32, i32* %p, align 4
@@ -35,16 +33,11 @@ entry:
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
 %struct.tree_common = type { i8*, i8*, i32 }
 
-; CHECK: ********** MI Scheduling **********
-; CHECK: test_zero:BB#0 entry
-; CHECK: SU({{[0-9]+}}):   STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU3:SU\([0-9]+\)]]
-; CHECK: SU({{[0-9]+}}):   STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU4:SU\([0-9]+\)]]
-; CHECK: [[SU3]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1]
-; CHECK: [[SU4]]:   STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2]
+; CHECK-LABEL: test_zero
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
+
 define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
 entry:
   %0 = bitcast %struct.tree_common* %t to i8*
diff --git a/test/CodeGen/AMDGPU/fmax3.ll b/test/CodeGen/AMDGPU/fmax3.ll
index a96eb5db9e2a..2e6d3f3c1e8f 100644
--- a/test/CodeGen/AMDGPU/fmax3.ll
+++ b/test/CodeGen/AMDGPU/fmax3.ll
@@ -1,39 +1,92 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare float @llvm.maxnum.f32(float, float) nounwind readnone
-
-; SI-LABEL: {{^}}test_fmax3_olt_0:
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmax3_olt_0_f32:
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile  float, float addrspace(1)* %aptr, align 4
   %b = load volatile float, float addrspace(1)* %bptr, align 4
   %c = load volatile float, float addrspace(1)* %cptr, align 4
-  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
-  %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+  %f0 = call float @llvm.maxnum.f32(float %a, float %b)
+  %f1 = call float @llvm.maxnum.f32(float %f0, float %c)
   store float %f1, float addrspace(1)* %out, align 4
   ret void
 }
 
 ; Commute operand of second fmax
-; SI-LABEL: {{^}}test_fmax3_olt_1:
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmax3_olt_1_f32:
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
+define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile float, float addrspace(1)* %aptr, align 4
   %b = load volatile float, float addrspace(1)* %bptr, align 4
   %c = load volatile float, float addrspace(1)* %cptr, align 4
-  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
-  %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+  %f0 = call float @llvm.maxnum.f32(float %a, float %b)
+  %f1 = call float @llvm.maxnum.f32(float %c, float %f0)
   store float %f1, float addrspace(1)* %out, align 4
   ret void
 }
+
+; GCN-LABEL: {{^}}test_fmax3_olt_0_f16:
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_max_f16_e32
+; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+  %a = load volatile  half, half addrspace(1)* %aptr, align 2
+  %b = load volatile half, half addrspace(1)* %bptr, align 2
+  %c = load volatile half, half addrspace(1)* %cptr, align 2
+  %f0 = call half @llvm.maxnum.f16(half %a, half %b)
+  %f1 = call half @llvm.maxnum.f16(half %f0, half %c)
+  store half %f1, half addrspace(1)* %out, align 2
+  ret void
+}
+
+; Commute operand of second fmax
+; GCN-LABEL: {{^}}test_fmax3_olt_1_f16:
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+
+; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_max_f16_e32
+; VI: v_max_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+  %a = load volatile half, half addrspace(1)* %aptr, align 2
+  %b = load volatile half, half addrspace(1)* %bptr, align 2
+  %c = load volatile half, half addrspace(1)* %cptr, align 2
+  %f0 = call half @llvm.maxnum.f16(half %a, half %b)
+  %f1 = call half @llvm.maxnum.f16(half %c, half %f0)
+  store half %f1, half addrspace(1)* %out, align 2
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare float @llvm.maxnum.f32(float, float) #1
+declare half @llvm.maxnum.f16(half, half) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/fmin3.ll b/test/CodeGen/AMDGPU/fmin3.ll
index 3183f77f090b..5fc5895c3ecb 100644
--- a/test/CodeGen/AMDGPU/fmin3.ll
+++ b/test/CodeGen/AMDGPU/fmin3.ll
@@ -1,40 +1,90 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare float @llvm.minnum.f32(float, float) nounwind readnone
-
-; SI-LABEL: {{^}}test_fmin3_olt_0:
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmin3_olt_0_f32:
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile float, float addrspace(1)* %aptr, align 4
   %b = load volatile float, float addrspace(1)* %bptr, align 4
   %c = load volatile float, float addrspace(1)* %cptr, align 4
-  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
-  %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+  %f0 = call float @llvm.minnum.f32(float %a, float %b)
+  %f1 = call float @llvm.minnum.f32(float %f0, float %c)
   store float %f1, float addrspace(1)* %out, align 4
   ret void
 }
 
 ; Commute operand of second fmin
-; SI-LABEL: {{^}}test_fmin3_olt_1:
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGC:v[0-9]+]]
-; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define amdgpu_kernel void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+; GCN-LABEL: {{^}}test_fmin3_olt_1_f32:
+; GCN: buffer_load_dword [[REGB:v[0-9]+]]
+; GCN: buffer_load_dword [[REGA:v[0-9]+]]
+; GCN: buffer_load_dword [[REGC:v[0-9]+]]
+; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_dword [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
   %a = load volatile float, float addrspace(1)* %aptr, align 4
   %b = load volatile float, float addrspace(1)* %bptr, align 4
   %c = load volatile float, float addrspace(1)* %cptr, align 4
-  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
-  %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+  %f0 = call float @llvm.minnum.f32(float %a, float %b)
+  %f1 = call float @llvm.minnum.f32(float %c, float %f0)
   store float %f1, float addrspace(1)* %out, align 4
   ret void
 }
+
+; GCN-LABEL: {{^}}test_fmin3_olt_0_f16:
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+
+; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_min_f16_e32
+; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+  %a = load volatile half, half addrspace(1)* %aptr, align 2
+  %b = load volatile half, half addrspace(1)* %bptr, align 2
+  %c = load volatile half, half addrspace(1)* %cptr, align 2
+  %f0 = call half @llvm.minnum.f16(half %a, half %b)
+  %f1 = call half @llvm.minnum.f16(half %f0, half %c)
+  store half %f1, half addrspace(1)* %out, align 2
+  ret void
+}
+
+; Commute operand of second fmin
+; GCN-LABEL: {{^}}test_fmin3_olt_1_f16:
+; GCN: buffer_load_ushort [[REGB:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGA:v[0-9]+]]
+; GCN: buffer_load_ushort [[REGC:v[0-9]+]]
+
+; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]],
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]]
+
+; VI: v_min_f16_e32
+; VI: v_min_f16_e32 [[RESULT:v[0-9]+]],
+
+; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; GCN: buffer_store_short [[RESULT]],
+define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
+  %a = load volatile half, half addrspace(1)* %aptr, align 2
+  %b = load volatile half, half addrspace(1)* %bptr, align 2
+  %c = load volatile half, half addrspace(1)* %cptr, align 2
+  %f0 = call half @llvm.minnum.f16(half %a, half %b)
+  %f1 = call half @llvm.minnum.f16(half %c, half %f0)
+  store half %f1, half addrspace(1)* %out, align 2
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare float @llvm.minnum.f32(float, float) #1
+declare half @llvm.minnum.f16(half, half) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll
index 80acfcca7082..1898c8fb63ea 100644
--- a/test/CodeGen/AMDGPU/global-constant.ll
+++ b/test/CodeGen/AMDGPU/global-constant.ll
@@ -29,10 +29,10 @@
 define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
   %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
   %val = load float, float addrspace(2)* %ptr
-  store float %val, float addrspace(1)* %out
+  store volatile float %val, float addrspace(1)* %out
   %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
   %val2 = load float, float addrspace(2)* %ptr2
-  store float %val2, float addrspace(1)* %out
+  store volatile float %val2, float addrspace(1)* %out
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll
index c15a30e3c540..96132d841997 100644
--- a/test/CodeGen/AMDGPU/immv216.ll
+++ b/test/CodeGen/AMDGPU/immv216.ll
@@ -288,9 +288,9 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
 }
 
 ; GCN-LABEL: {{^}}commute_add_literal_v2f16:
-; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x64006400
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[K]], [[VAL]]
+; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[K]], [[VAL]] op_sel_hi:[0,1]{{$}}
 ; GFX9: buffer_store_dword [[REG]]
 
 ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}}
diff --git a/test/CodeGen/AMDGPU/max3.ll b/test/CodeGen/AMDGPU/max3.ll
index 4bb4fd46becd..46dcf8e340f4 100644
--- a/test/CodeGen/AMDGPU/max3.ll
+++ b/test/CodeGen/AMDGPU/max3.ll
@@ -1,41 +1,94 @@
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imax3_sgt_i32
-; SI: v_max3_i32
-define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_imax3_sgt_i32:
+; GCN: v_max3_i32
+define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
   %icmp0 = icmp sgt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp sgt i32 %i0, %c
   %i1 = select i1 %icmp1, i32 %i0, i32 %c
-  store i32 %i1, i32 addrspace(1)* %out, align 4
+  store i32 %i1, i32 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v_test_umax3_ugt_i32
-; SI: v_max3_u32
-define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umax3_ugt_i32:
+; GCN: v_max3_u32
+define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
   %icmp0 = icmp ugt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ugt i32 %i0, %c
   %i1 = select i1 %icmp1, i32 %i0, i32 %c
-  store i32 %i1, i32 addrspace(1)* %out, align 4
+  store i32 %i1, i32 addrspace(1)* %out
   ret void
 }
+
+; GCN-LABEL: {{^}}v_test_imax3_sgt_i16:
+; SI: v_max3_i32
+
+; VI: v_max_i16
+; VI: v_max_i16
+
+; GFX9: v_max3_i16
+define amdgpu_kernel void @v_test_imax3_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+  %a = load i16, i16 addrspace(1)* %gep0
+  %b = load i16, i16 addrspace(1)* %gep1
+  %c = load i16, i16 addrspace(1)* %gep2
+  %icmp0 = icmp sgt i16 %a, %b
+  %i0 = select i1 %icmp0, i16 %a, i16 %b
+  %icmp1 = icmp sgt i16 %i0, %c
+  %i1 = select i1 %icmp1, i16 %i0, i16 %c
+  store i16 %i1, i16 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umax3_ugt_i16:
+; SI: v_max3_u32
+
+; VI: v_max_u16
+; VI: v_max_u16
+
+; GFX9: v_max3_u16
+define amdgpu_kernel void @v_test_umax3_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+  %a = load i16, i16 addrspace(1)* %gep0
+  %b = load i16, i16 addrspace(1)* %gep1
+  %c = load i16, i16 addrspace(1)* %gep2
+  %icmp0 = icmp ugt i16 %a, %b
+  %i0 = select i1 %icmp0, i16 %a, i16 %b
+  %icmp1 = icmp ugt i16 %i0, %c
+  %i1 = select i1 %icmp1, i16 %i0, i16 %c
+  store i16 %i1, i16 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/min3.ll b/test/CodeGen/AMDGPU/min3.ll
index 59d5d2cdb1aa..e20fb81f2ecf 100644
--- a/test/CodeGen/AMDGPU/min3.ll
+++ b/test/CodeGen/AMDGPU/min3.ll
@@ -1,50 +1,50 @@
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-; FUNC-LABEL: @v_test_imin3_slt_i32
-; SI: v_min3_i32
-define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_imin3_slt_i32:
+; GCN: v_min3_i32
+define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp slt i32 %i0, %c
   %i1 = select i1 %icmp1, i32 %i0, i32 %c
-  store i32 %i1, i32 addrspace(1)* %outgep, align 4
+  store i32 %i1, i32 addrspace(1)* %outgep
   ret void
 }
 
-; FUNC-LABEL: @v_test_umin3_ult_i32
-; SI: v_min3_u32
-define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin3_ult_i32:
+; GCN: v_min3_u32
+define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
   %icmp0 = icmp ult i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ult i32 %i0, %c
   %i1 = select i1 %icmp1, i32 %i0, i32 %c
-  store i32 %i1, i32 addrspace(1)* %outgep, align 4
+  store i32 %i1, i32 addrspace(1)* %outgep
   ret void
 }
 
-; FUNC-LABEL: @v_test_umin_umin_umin
-; SI: v_min_i32
-; SI: v_min3_i32
-define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin_umin_umin:
+; GCN: v_min_i32
+; GCN: v_min3_i32
+define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid2 = mul i32 %tid, 2
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -57,10 +57,10 @@ define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 add
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
-  %d = load i32, i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
+  %d = load i32, i32 addrspace(1)* %gep3
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -71,14 +71,14 @@ define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 add
   %icmp2 = icmp slt i32 %i0, %i1
   %i2 = select i1 %icmp2, i32 %i0, i32 %i1
 
-  store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+  store i32 %i2, i32 addrspace(1)* %outgep1
   ret void
 }
 
-; FUNC-LABEL: @v_test_umin3_2_uses
-; SI-NOT: v_min3
-define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; GCN-LABEL: {{^}}v_test_umin3_2_uses:
+; GCN-NOT: v_min3
+define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid2 = mul i32 %tid, 2
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
@@ -91,10 +91,10 @@ define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrs
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32, i32 addrspace(1)* %gep0, align 4
-  %b = load i32, i32 addrspace(1)* %gep1, align 4
-  %c = load i32, i32 addrspace(1)* %gep2, align 4
-  %d = load i32, i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0
+  %b = load i32, i32 addrspace(1)* %gep1
+  %c = load i32, i32 addrspace(1)* %gep2
+  %d = load i32, i32 addrspace(1)* %gep3
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -105,7 +105,60 @@ define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrs
   %icmp2 = icmp slt i32 %i0, %c
   %i2 = select i1 %icmp2, i32 %i0, i32 %c
 
-  store i32 %i2, i32 addrspace(1)* %outgep0, align 4
-  store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+  store i32 %i2, i32 addrspace(1)* %outgep0
+  store i32 %i0, i32 addrspace(1)* %outgep1
   ret void
 }
+
+; GCN-LABEL: {{^}}v_test_imin3_slt_i16:
+; SI: v_min3_i32
+
+; VI: v_min_i16
+; VI: v_min_i16
+
+; GFX9: v_min3_i16
+define amdgpu_kernel void @v_test_imin3_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+  %a = load i16, i16 addrspace(1)* %gep0
+  %b = load i16, i16 addrspace(1)* %gep1
+  %c = load i16, i16 addrspace(1)* %gep2
+  %icmp0 = icmp slt i16 %a, %b
+  %i0 = select i1 %icmp0, i16 %a, i16 %b
+  %icmp1 = icmp slt i16 %i0, %c
+  %i1 = select i1 %icmp1, i16 %i0, i16 %c
+  store i16 %i1, i16 addrspace(1)* %outgep
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umin3_ult_i16:
+; SI: v_min3_u32
+
+; VI: v_min_u16
+; VI: v_min_u16
+
+; GFX9: v_min3_u16
+define amdgpu_kernel void @v_test_umin3_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+  %a = load i16, i16 addrspace(1)* %gep0
+  %b = load i16, i16 addrspace(1)* %gep1
+  %c = load i16, i16 addrspace(1)* %gep2
+  %icmp0 = icmp ult i16 %a, %b
+  %i0 = select i1 %icmp0, i16 %a, i16 %b
+  %icmp1 = icmp ult i16 %i0, %c
+  %i1 = select i1 %icmp1, i16 %i0, i16 %c
+  store i16 %i1, i16 addrspace(1)* %outgep
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/AMDGPU/packed-op-sel.ll b/test/CodeGen/AMDGPU/packed-op-sel.ll
new file mode 100644
index 000000000000..6ff0c54c33d0
--- /dev/null
+++ b/test/CodeGen/AMDGPU/packed-op-sel.ll
@@ -0,0 +1,266 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
+
+; GCN-LABEL: {{^}}fma_vector_vector_scalar_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}}
+define amdgpu_kernel void @fma_vector_vector_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Apply fneg to broadcasted vector
+; GCN-LABEL: {{^}}fma_vector_vector_neg_broadcast_scalar_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
+define amdgpu_kernel void @fma_vector_vector_neg_broadcast_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %scalar0.broadcast
+
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Apply fneg before broadcast
+; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
+define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %neg.scalar0 = fsub half -0.0, %scalar0
+  %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Apply fneg before and after broadcast, and should cancel out.
+; GCN-LABEL: {{^}}fma_vector_vector_neg_broadcast_neg_scalar_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}}
+define amdgpu_kernel void @fma_vector_vector_neg_broadcast_neg_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %neg.scalar0 = fsub half -0.0, %scalar0
+  %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %neg.scalar0.broadcast
+
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Add scalar, but negate low component
+; GCN-LABEL: {{^}}fma_vector_vector_scalar_neg_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1]{{$}}
+define amdgpu_kernel void @fma_vector_vector_scalar_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %neg.scalar0 = fsub half -0.0, %scalar0
+  %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0
+  %neg.scalar0.scalar0 = insertelement <2 x half> %neg.scalar0.vec, half %scalar0, i32 1
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.scalar0)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Add scalar, but negate high component
+; GCN-LABEL: {{^}}fma_vector_vector_scalar_neg_hi:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_hi:[0,0,1]{{$}}
+define amdgpu_kernel void @fma_vector_vector_scalar_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+
+  %neg.scalar0 = fsub half -0.0, %scalar0
+  %neg.scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0
+  %scalar0.neg.scalar0 = insertelement <2 x half> %neg.scalar0.vec, half %neg.scalar0, i32 1
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.neg.scalar0)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; Apply fneg before broadcast with bitcast
+; GCN-LABEL: {{^}}add_vector_neg_bitcast_scalar_lo:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+
+; GCN-NOT: pack
+; GCN-NOT: and
+; GCN-NOT: shl
+; GCN-NOT: or
+
+; GCN: v_xor_b32_e32 [[NEG_SCALAR0:v[0-9]+]], 0x8000, [[SCALAR0]]
+; GCN-NEXT: v_pk_add_u16 v{{[0-9]+}}, [[VEC0]], [[NEG_SCALAR0]] op_sel_hi:[1,0]{{$}}
+define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %vec0 = load volatile <2 x i16>, <2 x i16> addrspace(3)* %lds, align 4
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+  %neg.scalar0 = fsub half -0.0, %scalar0
+  %neg.scalar0.bc = bitcast half %neg.scalar0 to i16
+
+  %neg.scalar0.vec = insertelement <2 x i16> undef, i16 %neg.scalar0.bc, i32 0
+  %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer
+
+  %result = add <2 x i16> %vec0, %neg.scalar0.broadcast
+  store <2 x i16> %result, <2 x i16> addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}fma_vector_vector_scalar_lo_neg_scalar_hi:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR1:v[0-9]+]]
+
+; FIXME: Remove and
+; GCN: v_and_b32_e32 [[SCALAR0]], 0xffff, [[SCALAR0]]
+; GCN: v_xor_b32_e32 [[SCALAR1]], 0x8000, [[SCALAR1]]
+; GCN: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[SCALAR1]], 16, [[SCALAR0]]
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]]{{$}}
+define amdgpu_kernel void @fma_vector_vector_scalar_lo_neg_scalar_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+  %arg2.gep = getelementptr inbounds half, half addrspace(3)* %arg2, i32 2
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+  %scalar1 = load volatile half, half addrspace(3)* %arg2.gep, align 2
+
+  %neg.scalar1 = fsub half -0.0, %scalar1
+  %vec.ins0 = insertelement <2 x half> undef, half %scalar0, i32 0
+  %vec2 = insertelement <2 x half> %vec.ins0, half %neg.scalar1, i32 1
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo_scalar_hi:
+; GCN: ds_read_b32 [[VEC0:v[0-9]+]]
+; GCN: ds_read_b32 [[VEC1:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]]
+; GCN: ds_read_u16 [[SCALAR1:v[0-9]+]]
+
+; FIXME: Remove and
+; GCN: v_and_b32_e32 [[SCALAR0]], 0xffff, [[SCALAR0]]
+; GCN: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[SCALAR1]], 16, [[SCALAR0]]
+
+; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
+define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 {
+bb:
+  %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
+  %arg2.gep = getelementptr inbounds half, half addrspace(3)* %arg2, i32 2
+
+  %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
+  %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
+
+  %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
+  %scalar1 = load volatile half, half addrspace(3)* %arg2.gep, align 2
+
+  %vec.ins0 = insertelement <2 x half> undef, half %scalar0, i32 0
+  %vec2 = insertelement <2 x half> %vec.ins0, half %scalar1, i32 1
+  %neg.vec2 = fsub <2 x half> <half -0.0, half -0.0>, %vec2
+
+  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2)
+  store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
+  ret void
+}
+
+declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
index 8403dd991360..777eccb00b02 100644
--- a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
+++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -20,7 +20,7 @@ bb3:                                              ; preds = %bb, %entry
 
 bb8:                                              ; preds = %bb3
   %1 = getelementptr inbounds i8, i8* %0, i32 0
-  store i8 0, i8* %1, align 1
+  store volatile i8 0, i8* %1, align 1
   %2 = call i32 @ptou() nounwind
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
@@ -35,7 +35,7 @@ bb8:                                              ; preds = %bb3
   %7 = or i8 %6, 48
   %8 = add i8 %6, 87
   %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
-  store i8 %iftmp.5.0.1, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.1, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -49,7 +49,7 @@ bb8:                                              ; preds = %bb3
   %13 = or i8 %12, 48
   %14 = add i8 %12, 87
   %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
-  store i8 %iftmp.5.0.2, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.2, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -63,7 +63,7 @@ bb8:                                              ; preds = %bb3
   %19 = or i8 %18, 48
   %20 = add i8 %18, 87
   %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
-  store i8 %iftmp.5.0.4, i8* null, align 1
+  store volatile i8 %iftmp.5.0.4, i8* null, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -74,7 +74,7 @@ bb8:                                              ; preds = %bb3
   %22 = urem i32 %21, 10
   %23 = icmp ult i32 %22, 10
   %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8
-  store i8 %iftmp.5.0.5, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.5, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -88,7 +88,7 @@ bb8:                                              ; preds = %bb3
   %28 = or i8 %27, 48
   %29 = add i8 %27, 87
   %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
-  store i8 %iftmp.5.0.6, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.6, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -102,7 +102,7 @@ bb8:                                              ; preds = %bb3
   %34 = or i8 %33, 48
   %35 = add i8 %33, 87
   %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
-  store i8 %iftmp.5.0.7, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.7, i8* %p8, align 1
   ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
   ; CHECK-NOT: [[REGISTER]],
   ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -116,7 +116,7 @@ bb8:                                              ; preds = %bb3
   %40 = or i8 %39, 48
   %41 = add i8 %39, 87
   %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
-  store i8 %iftmp.5.0.8, i8* null, align 1
+  store volatile i8 %iftmp.5.0.8, i8* null, align 1
   br label %bb46
 
 bb46:                                             ; preds = %bb3
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index 2a5af6199a34..954860219d19 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -13,7 +13,7 @@ entry:
 ; CHECK: sub       sp, sp, #12
 ; CHECK: sub       sp, sp, #4
 ; CHECK: add       r0, sp, #4
-; CHECK: stm       sp, {r0, r1, r2, r3}
+; CHECK: stmib     sp, {r1, r2, r3}
   %g = alloca i8*
   %g1 = bitcast i8** %g to i8*
   call void @llvm.va_start(i8* %g1)
diff --git a/test/CodeGen/ARM/dag-combine-ldst.ll b/test/CodeGen/ARM/dag-combine-ldst.ll
index c1960ee6c6e9..077754ef013d 100644
--- a/test/CodeGen/ARM/dag-combine-ldst.ll
+++ b/test/CodeGen/ARM/dag-combine-ldst.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL:   {{^}}main
 ; CHECK:         mov [[TMP:r[0-9]+]], #0
 ; CHECK-NEXT:    str [[TMP]], [sp, #4]
-; CHECK-NEXT:    str [[TMP]], [sp]
+; CHECK_O0:      str [[TMP]], [sp]
 ; CHECK_O0:      ldr [[TMP:r[0-9]+]], [sp]
 ; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2
 ; CHECK_O1-NOT:  ldr [[TMP:r[0-9]+]], [sp]
diff --git a/test/CodeGen/MSP430/vararg.ll b/test/CodeGen/MSP430/vararg.ll
index 6c8bceff5de9..a708b89cbd8f 100644
--- a/test/CodeGen/MSP430/vararg.ll
+++ b/test/CodeGen/MSP430/vararg.ll
@@ -25,7 +25,6 @@ define i16 @va_arg(i8* %vl) nounwind {
 entry:
 ; CHECK-LABEL: va_arg:
   %vl.addr = alloca i8*, align 2
-; CHECK: mov.w r12, 0(r1)
   store i8* %vl, i8** %vl.addr, align 2
 ; CHECK: mov.w r12, [[REG:r[0-9]+]]
 ; CHECK-NEXT: add.w #2, [[REG]]
diff --git a/test/CodeGen/Mips/msa/bmzi_bmnzi.ll b/test/CodeGen/Mips/msa/bmzi_bmnzi.ll
index d1cb3c348c73..de62dcd69403 100644
--- a/test/CodeGen/Mips/msa/bmzi_bmnzi.ll
+++ b/test/CodeGen/Mips/msa/bmzi_bmnzi.ll
@@ -9,9 +9,9 @@ entry:
   %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
   %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
-  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
   %3 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
-  store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
   %4 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
   store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
   ret void
@@ -32,9 +32,9 @@ entry:
   %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
   %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
-  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
   %3 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
-  store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
   %4 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
   store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
   ret void
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 18715ddb37c6..2039c1f57f17 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
-; RUN: llc < %s -march=ppc64le -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
-; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
-; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
+; RUN: llc < %s -ppc-asm-full-reg-names -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -ppc-asm-full-reg-names -march=ppc64le -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc < %s -ppc-asm-full-reg-names -march=ppc64 -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -ppc-asm-full-reg-names -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add:
@@ -108,8 +108,10 @@ entry:
 ; CHECK: @atomic_load
   %tmp = load atomic i64, i64* %mem acquire, align 64
 ; CHECK-NOT: ldarx
-; CHECK: ld
-; CHECK: lwsync
+; CHECK: ld [[VAL:r[0-9]+]]
+; CHECK: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK: bne- [[CR]], .+4
+; CHECK: isync
   ret i64 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/atomics-indexed.ll b/test/CodeGen/PowerPC/atomics-indexed.ll
index 7a0dde034d68..cfe15f0061c4 100644
--- a/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -10,16 +10,22 @@
 define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
 ; CHECK-LABEL: load_x_i8_seq_cst
 ; CHECK: sync
-; CHECK: lbzx
-; CHECK: lwsync
+; CHECK: lbzx [[VAL:r[0-9]+]]
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
   %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
   %val = load atomic i8, i8* %ptr seq_cst, align 1
   ret i8 %val
 }
 define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
 ; CHECK-LABEL: load_x_i16_acquire
-; CHECK: lhzx
-; CHECK: lwsync
+; CHECK: lhzx [[VAL:r[0-9]+]]
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
   %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
   %val = load atomic i16, i16* %ptr acquire, align 2
   ret i16 %val
diff --git a/test/CodeGen/PowerPC/atomics-regression.ll b/test/CodeGen/PowerPC/atomics-regression.ll
index 9af82b625532..054d3a4146b0 100644
--- a/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/test/CodeGen/PowerPC/atomics-regression.ll
@@ -23,7 +23,9 @@ define i8 @test2(i8* %ptr) {
 ; PPC64LE-LABEL: test2:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lbz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i8, i8* %ptr acquire, align 1
   ret i8 %val
@@ -35,7 +37,9 @@ define i8 @test3(i8* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lbz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i8, i8* %ptr seq_cst, align 1
   ret i8 %val
@@ -63,7 +67,9 @@ define i16 @test6(i16* %ptr) {
 ; PPC64LE-LABEL: test6:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lhz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i16, i16* %ptr acquire, align 2
   ret i16 %val
@@ -75,7 +81,9 @@ define i16 @test7(i16* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lhz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i16, i16* %ptr seq_cst, align 2
   ret i16 %val
@@ -103,7 +111,9 @@ define i32 @test10(i32* %ptr) {
 ; PPC64LE-LABEL: test10:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lwz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i32, i32* %ptr acquire, align 4
   ret i32 %val
@@ -115,7 +125,9 @@ define i32 @test11(i32* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lwz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i32, i32* %ptr seq_cst, align 4
   ret i32 %val
@@ -143,7 +155,9 @@ define i64 @test14(i64* %ptr) {
 ; PPC64LE-LABEL: test14:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    ld 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i64, i64* %ptr acquire, align 8
   ret i64 %val
@@ -155,7 +169,9 @@ define i64 @test15(i64* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    ld 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %val
@@ -9544,3 +9560,35 @@ define i64 @test559(i64* %ptr, i64 %val) {
   %ret = atomicrmw umin i64* %ptr, i64 %val singlethread seq_cst
   ret i64 %ret
 }
+
+; The second load should never be scheduled before isync.
+define i32 @test_ordering0(i32* %ptr1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering0:
+; PPC64LE:       # BB#0:
+; PPC64LE-NEXT:    lwz 4, 0(3)
+; PPC64LE-NEXT:    cmpw 7, 4, 4
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
+; PPC64LE-NEXT:    lwz 3, 0(3)
+; PPC64LE-NEXT:    add 3, 4, 3
+; PPC64LE-NEXT:    blr
+  %val1 = load atomic i32, i32* %ptr1 acquire, align 4
+  %val2 = load i32, i32* %ptr1
+  %add = add i32 %val1, %val2
+  ret i32 %add
+}
+
+; The second store should never be scheduled before isync.
+define i32 @test_ordering1(i32* %ptr1, i32 %val1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering1:
+; PPC64LE:       # BB#0:
+; PPC64LE-NEXT:    lwz 3, 0(3)
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
+; PPC64LE-NEXT:    stw 4, 0(5)
+; PPC64LE-NEXT:    blr
+  %val2 = load atomic i32, i32* %ptr1 acquire, align 4
+  store i32 %val1, i32* %ptr2
+  ret i32 %val2
+}
diff --git a/test/CodeGen/PowerPC/atomics.ll b/test/CodeGen/PowerPC/atomics.ll
index 2e1eff0f634d..61d54534f5fd 100644
--- a/test/CodeGen/PowerPC/atomics.ll
+++ b/test/CodeGen/PowerPC/atomics.ll
@@ -25,9 +25,12 @@ define i16 @load_i16_monotonic(i16* %mem) {
 }
 define i32 @load_i32_acquire(i32* %mem) {
 ; CHECK-LABEL: load_i32_acquire
-; CHECK: lwz
+; CHECK: lwz [[VAL:r[0-9]+]]
   %val = load atomic i32, i32* %mem acquire, align 4
-; CHECK: lwsync
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
   ret i32 %val
 }
 define i64 @load_i64_seq_cst(i64* %mem) {
@@ -35,9 +38,12 @@ define i64 @load_i64_seq_cst(i64* %mem) {
 ; CHECK: sync
 ; PPC32: __sync_
 ; PPC64-NOT: __sync_
-; PPC64: ld
+; PPC64: ld [[VAL:r[0-9]+]]
   %val = load atomic i64, i64* %mem seq_cst, align 8
-; CHECK: lwsync
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
   ret i64 %val
 }
 
diff --git a/test/CodeGen/PowerPC/ppcf128sf.ll b/test/CodeGen/PowerPC/ppcf128sf.ll
index 6804b551e572..fde7d48da7c2 100644
--- a/test/CodeGen/PowerPC/ppcf128sf.ll
+++ b/test/CodeGen/PowerPC/ppcf128sf.ll
@@ -14,19 +14,19 @@ entry:
   %0 = load ppc_fp128, ppc_fp128* @ld, align 16
   %1 = load ppc_fp128, ppc_fp128* @ld2, align 16
   %add = fadd ppc_fp128 %0, %1
-  store ppc_fp128 %add, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %add, ppc_fp128* %c, align 16
   %2 = load ppc_fp128, ppc_fp128* @ld, align 16
   %3 = load ppc_fp128, ppc_fp128* @ld2, align 16
   %sub = fsub ppc_fp128 %2, %3
-  store ppc_fp128 %sub, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %sub, ppc_fp128* %c, align 16
   %4 = load ppc_fp128, ppc_fp128* @ld, align 16
   %5 = load ppc_fp128, ppc_fp128* @ld2, align 16
   %mul = fmul ppc_fp128 %4, %5
-  store ppc_fp128 %mul, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %mul, ppc_fp128* %c, align 16
   %6 = load ppc_fp128, ppc_fp128* @ld, align 16
   %7 = load ppc_fp128, ppc_fp128* @ld2, align 16
   %div = fdiv ppc_fp128 %6, %7
-  store ppc_fp128 %div, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %div, ppc_fp128* %c, align 16
   ret void
 
   ; CHECK-LABEL:    __gcc_qadd
diff --git a/test/CodeGen/PowerPC/save-bp.ll b/test/CodeGen/PowerPC/save-bp.ll
new file mode 100644
index 000000000000..1c7e19a1d5cb
--- /dev/null
+++ b/test/CodeGen/PowerPC/save-bp.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=ppc64 -ppc-always-use-base-pointer < %s | FileCheck %s --check-prefix CHECK --check-prefix PPC64
+; RUN: llc -march=ppc32 -ppc-always-use-base-pointer < %s | FileCheck %s --check-prefix CHECK --check-prefix PPC32
+; RUN: llc -march=ppc32 -ppc-always-use-base-pointer -relocation-model pic < %s | FileCheck %s --check-prefix CHECK --check-prefix PPC32PIC
+
+; CHECK-LABEL: fred:
+
+; Check for saving/restoring frame pointer (X31) and base pointer (X30)
+; on ppc64:
+; PPC64: std 31, -8(1)
+; PPC64: std 30, -16(1)
+; PPC64: ld 31, -8(1)
+; PPC64: ld 30, -16(1)
+
+; Check for saving/restoring frame pointer (R31) and base pointer (R30)
+; on ppc32:
+; PPC32: stwux 1, 1, 0
+; PPC32; addic 0, 0, -4
+; PPC32: stwx 31, 0, 0
+; PPC32: addic 0, 0, -4
+; PPC32: stwx 30, 0, 0
+; The restore sequence:
+; PPC32: lwz 31, 0(1)
+; PPC32: addic 30, 0, 8
+; PPC32: lwz 0, -4(31)
+; PPC32: lwz 30, -8(31)
+; PPC32: mr 1, 31
+; PPC32: mr 31, 0
+
+; Check for saving/restoring frame pointer (R31) and base pointer (R29)
+; on ppc32/pic. This is mostly the same as without pic, except that base
+; pointer is in R29.
+; PPC32PIC: stwux 1, 1, 0
+; PPC32PIC; addic 0, 0, -4
+; PPC32PIC: stwx 31, 0, 0
+; PPC32PIC: addic 0, 0, -8
+; PPC32PIC: stwx 29, 0, 0
+; The restore sequence:
+; PPC32PIC: lwz 31, 0(1)
+; PPC32PIC: addic 29, 0, 12
+; PPC32PIC: lwz 0, -4(31)
+; PPC32PIC: lwz 29, -12(31)
+; PPC32PIC: mr 1, 31
+; PPC32PIC: mr 31, 0
+
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-freebsd"
+
+define i64 @fred() local_unnamed_addr #0 {
+entry:
+  ret i64 0
+}
+
+attributes #0 = { norecurse readnone nounwind sspstrong "no-frame-pointer-elim"="true" "target-cpu"="ppc" }
diff --git a/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll b/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll
new file mode 100644
index 000000000000..9fabca186050
--- /dev/null
+++ b/test/CodeGen/PowerPC/save-cr-ppc32svr4.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=ppc32 -relocation-model pic < %s | FileCheck %s
+;
+; Make sure that the CR register is saved correctly on PPC32/SVR4.
+
+; CHECK-LABEL: fred:
+; CHECK: stwu 1, -32(1)
+; CHECK: stw 31, 28(1)
+; CHECK: mr 31, 1
+; CHECK: stw 30, 24(1)
+; CHECK: mfcr [[CR:[0-9]+]]
+; CHECK: stw [[CR]], 20(31)
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-freebsd"
+
+; Function Attrs: norecurse nounwind readnone sspstrong
+define i64 @fred(double %a0) local_unnamed_addr #0 {
+b1:
+  %v2 = fcmp olt double %a0, 0x43E0000000000000
+  br i1 %v2, label %b3, label %b7
+
+b3:                                               ; preds = %b1
+  %v4 = fcmp olt double %a0, 0xC3E0000000000000
+  %v5 = fptosi double %a0 to i64
+  %v6 = select i1 %v4, i64 -9223372036854775808, i64 %v5
+  br label %b14
+
+b7:                                               ; preds = %b1
+  %v8 = fcmp olt double %a0, 0x43F0000000000000
+  br i1 %v8, label %b9, label %b11
+
+b9:                                               ; preds = %b7
+  %v10 = fptoui double %a0 to i64
+  br label %b14
+
+b11:                                              ; preds = %b7
+  %v12 = fcmp ogt double %a0, 0.000000e+00
+  %v13 = sext i1 %v12 to i64
+  br label %b14
+
+b14:                                              ; preds = %b11, %b9, %b3
+  %v15 = phi i64 [ %v6, %b3 ], [ %v10, %b9 ], [ %v13, %b11 ]
+  ret i64 %v15
+}
+
+attributes #0 = { norecurse nounwind readnone sspstrong "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="ppc" }
diff --git a/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
new file mode 100644
index 000000000000..b7b3c1ada965
--- /dev/null
+++ b/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=ppc32 -relocation-model pic < %s | FileCheck %s
+
+; CHECK-LABEL: fred
+; CHECK: stwux 1, 1, 0
+; Save R31..R29 via R0:
+; CHECK: addic 0, 0, -4
+; CHECK: stwx 31, 0, 0
+; CHECK: addic 0, 0, -4
+; CHECK: stwx 30, 0, 0
+; CHECK: addic 0, 0, -4
+; CHECK: stwx 29, 0, 0
+; Set R29 back to the value of R0 from before the updates:
+; CHECK: addic 29, 0, 12
+; Save CR through R12 using R29 as the stack pointer (aligned base pointer).
+; CHECK: mfcr 12
+; CHECK: stw 28, -16(29)
+; CHECK: stw 12, -20(29)
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-freebsd"
+
+; Function Attrs: norecurse readnone sspstrong
+define i64 @fred(double %a0) local_unnamed_addr #0 {
+b1:
+  %v2 = alloca i64, align 128
+  store i64 0, i64* %v2
+  %v3 = fcmp olt double %a0, 0x43E0000000000000
+  br i1 %v3, label %b4, label %b8
+
+b4:                                               ; preds = %b1
+  %v5 = fcmp olt double %a0, 0xC3E0000000000000
+  %v6 = fptosi double %a0 to i64
+  store i64 %v6, i64* %v2
+  %v7 = select i1 %v5, i64 -9223372036854775808, i64 %v6
+  br label %b15
+
+b8:                                               ; preds = %b1
+  %v9 = fcmp olt double %a0, 0x43F0000000000000
+  br i1 %v9, label %b10, label %b12
+
+b10:                                              ; preds = %b8
+  %v11 = fptoui double %a0 to i64
+  br label %b15
+
+b12:                                              ; preds = %b8
+  %v13 = fcmp ogt double %a0, 0.000000e+00
+  %v14 = sext i1 %v13 to i64
+  br label %b15
+
+b15:                                              ; preds = %b12, %b10, %b4
+  %v16 = phi i64 [ %v7, %b4 ], [ %v11, %b10 ], [ %v14, %b12 ]
+  %v17 = load i64, i64* %v2
+  %v18 = add i64 %v17, %v16
+  ret i64 %v18
+}
+
+attributes #0 = { norecurse readnone sspstrong "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="ppc" }
diff --git a/test/CodeGen/SPARC/32abi.ll b/test/CodeGen/SPARC/32abi.ll
index 09e7a3a09d86..3807f84d4e92 100644
--- a/test/CodeGen/SPARC/32abi.ll
+++ b/test/CodeGen/SPARC/32abi.ll
@@ -25,17 +25,17 @@ define void @intarg(i8  %a0,   ; %i0
                     i32 %a5,   ; %i5
                     i32 signext %a6,   ; [%fp+92]
                     i8* %a7) { ; [%fp+96]
-  store i8 %a0, i8* %a4
-  store i8 %a1, i8* %a4
+  store volatile i8 %a0, i8* %a4
+  store volatile i8 %a1, i8* %a4
   %p16 = bitcast i8* %a4 to i16*
-  store i16 %a2, i16* %p16
+  store volatile i16 %a2, i16* %p16
   %p32 = bitcast i8* %a4 to i32*
-  store i32 %a3, i32* %p32
+  store volatile i32 %a3, i32* %p32
   %pp = bitcast i8* %a4 to i8**
-  store i8* %a4, i8** %pp
-  store i32 %a5, i32* %p32
-  store i32 %a6, i32* %p32
-  store i8* %a7, i8** %pp
+  store volatile i8* %a4, i8** %pp
+  store volatile i32 %a5, i32* %p32
+  store volatile i32 %a6, i32* %p32
+  store volatile i8* %a7, i8** %pp
   ret void
 }
 
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index b963be2e9853..771cc409554b 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -24,17 +24,17 @@ define void @intarg(i8  %a0,   ; %i0
                     i32 %a5,   ; %i5
                     i32 signext %a6,   ; [%fp+BIAS+176]
                     i8* %a7) { ; [%fp+BIAS+184]
-  store i8 %a0, i8* %a4
-  store i8 %a1, i8* %a4
+  store volatile i8 %a0, i8* %a4
+  store volatile i8 %a1, i8* %a4
   %p16 = bitcast i8* %a4 to i16*
-  store i16 %a2, i16* %p16
+  store volatile i16 %a2, i16* %p16
   %p32 = bitcast i8* %a4 to i32*
-  store i32 %a3, i32* %p32
+  store volatile i32 %a3, i32* %p32
   %pp = bitcast i8* %a4 to i8**
-  store i8* %a4, i8** %pp
-  store i32 %a5, i32* %p32
-  store i32 %a6, i32* %p32
-  store i8* %a7, i8** %pp
+  store volatile i8* %a4, i8** %pp
+  store volatile i32 %a5, i32* %p32
+  store volatile i32 %a6, i32* %p32
+  store volatile i8* %a7, i8** %pp
   ret void
 }
 
@@ -316,7 +316,7 @@ define void @call_ret_i64_pair(i64* %i0) {
   %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
                                         i64* undef, i64* undef)
   %e0 = extractvalue { i64, i64 } %rv, 0
-  store i64 %e0, i64* %i0
+  store volatile i64 %e0, i64* %i0
   %e1 = extractvalue { i64, i64 } %rv, 1
   store i64 %e1, i64* %i0
   ret void
diff --git a/test/CodeGen/SystemZ/swift-return.ll b/test/CodeGen/SystemZ/swift-return.ll
index 69d0e979190c..977816f66bec 100644
--- a/test/CodeGen/SystemZ/swift-return.ll
+++ b/test/CodeGen/SystemZ/swift-return.ll
@@ -189,11 +189,11 @@ define void @consume_i1_ret() {
   %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
   %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
   %val = zext i1 %v3 to i32
-  store i32 %val, i32* @var
+  store volatile i32 %val, i32* @var
   %val2 = zext i1 %v5 to i32
-  store i32 %val2, i32* @var
+  store volatile i32 %val2, i32* @var
   %val3 = zext i1 %v6 to i32
-  store i32 %val3, i32* @var
+  store volatile i32 %val3, i32* @var
   %val4 = zext i1 %v7 to i32
   store i32 %val4, i32* @var
   ret void
diff --git a/test/CodeGen/Thumb/stack-access.ll b/test/CodeGen/Thumb/stack-access.ll
index 44217aba62d5..533559a67421 100644
--- a/test/CodeGen/Thumb/stack-access.ll
+++ b/test/CodeGen/Thumb/stack-access.ll
@@ -7,13 +7,13 @@ define void @test1(i8** %p) {
   %z = alloca i8, align 1
 ; CHECK: add r1, sp, #8
 ; CHECK: str r1, [r0]
-  store i8* %x, i8** %p, align 4
+  store volatile i8* %x, i8** %p, align 4
 ; CHECK: add r1, sp, #4
 ; CHECK: str r1, [r0]
-  store i8* %y, i8** %p, align 4
+  store volatile i8* %y, i8** %p, align 4
 ; CHECK: mov r1, sp
 ; CHECK: str r1, [r0]
-  store i8* %z, i8** %p, align 4
+  store volatile i8* %z, i8** %p, align 4
   ret void
 }
 
@@ -24,10 +24,10 @@ define void @test2([1024 x i8]** %p) {
 ; CHECK: add r1, sp, #1020
 ; CHECK: adds r1, #4
 ; CHECK: str r1, [r0]
-  store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
+  store volatile [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
 ; CHECK: mov r1, sp
 ; CHECK: str r1, [r0]
-  store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
+  store volatile [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
   ret void
 }
 
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 3e4bd02097ad..c6d00d4c1e11 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -50,9 +50,9 @@ bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
-  store %union.rec* null, %union.rec** @zz_hold, align 4
+  store volatile %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
-  store %union.rec* %x, %union.rec** @zz_hold, align 4
+  store volatile %union.rec* %x, %union.rec** @zz_hold, align 4
   %0 = call  %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
   unreachable
 
diff --git a/test/CodeGen/X86/GlobalISel/add-scalar.ll b/test/CodeGen/X86/GlobalISel/add-scalar.ll
index 553bc2789ff0..85db1c0e7e7a 100644
--- a/test/CodeGen/X86/GlobalISel/add-scalar.ll
+++ b/test/CodeGen/X86/GlobalISel/add-scalar.ll
@@ -1,44 +1,94 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc -mtriple=i386-linux-gnu   -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
 
 define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_add_i64:
-; ALL:       # BB#0:
-; ALL-NEXT:    leaq (%rsi,%rdi), %rax
-; ALL-NEXT:    retq
+; X64-LABEL: test_add_i64:
+; X64:       # BB#0:
+; X64-NEXT:    leaq (%rsi,%rdi), %rax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_add_i64:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:  .Lcfi0:
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:  .Lcfi1:
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:  .Lcfi2:
+; X32-NEXT:    .cfi_def_cfa_register %ebp
+; X32-NEXT:    pushl %esi
+; X32-NEXT:  .Lcfi3:
+; X32-NEXT:    .cfi_offset %esi, -12
+; X32-NEXT:    leal 8(%ebp), %ecx
+; X32-NEXT:    leal 12(%ebp), %esi
+; X32-NEXT:    leal 16(%ebp), %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    leal 20(%ebp), %edx
+; X32-NEXT:    movl (%edx), %edx
+; X32-NEXT:    addl (%ecx), %eax
+; X32-NEXT:    adcl (%esi), %edx
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
   %ret = add i64 %arg1, %arg2
   ret i64 %ret
 }
 
 define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_add_i32:
-; ALL:       # BB#0:
-; ALL-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; ALL-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
-; ALL-NEXT:    leal (%rsi,%rdi), %eax
-; ALL-NEXT:    retq
+; X64-LABEL: test_add_i32:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
+; X64-NEXT:    leal (%rsi,%rdi), %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_add_i32:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %ecx
+; X32-NEXT:    leal 8(%esp), %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl (%ecx), %eax
+; X32-NEXT:    retl
   %ret = add i32 %arg1, %arg2
   ret i32 %ret
 }
 
 define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_add_i16:
-; ALL:       # BB#0:
-; ALL-NEXT:    # kill: %DI<def> %DI<kill> %RDI<def>
-; ALL-NEXT:    # kill: %SI<def> %SI<kill> %RSI<def>
-; ALL-NEXT:    leal (%rsi,%rdi), %eax
-; ALL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; ALL-NEXT:    retq
+; X64-LABEL: test_add_i16:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %DI<def> %DI<kill> %RDI<def>
+; X64-NEXT:    # kill: %SI<def> %SI<kill> %RSI<def>
+; X64-NEXT:    leal (%rsi,%rdi), %eax
+; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_add_i16:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %ecx
+; X32-NEXT:    leal 8(%esp), %eax
+; X32-NEXT:    movzwl (%eax), %eax
+; X32-NEXT:    addw (%ecx), %ax
+; X32-NEXT:    retl
   %ret = add i16 %arg1, %arg2
   ret i16 %ret
 }
 
 define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_add_i8:
-; ALL:       # BB#0:
-; ALL-NEXT:    addb %dil, %sil
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
+; X64-LABEL: test_add_i8:
+; X64:       # BB#0:
+; X64-NEXT:    addb %dil, %sil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_add_i8:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %ecx
+; X32-NEXT:    leal 8(%esp), %eax
+; X32-NEXT:    movb (%eax), %al
+; X32-NEXT:    addb (%ecx), %al
+; X32-NEXT:    retl
   %ret = add i8 %arg1, %arg2
   ret i8 %ret
 }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-add.mir b/test/CodeGen/X86/GlobalISel/legalize-add.mir
index 22619cc71033..6a03388da947 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-add.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-add.mir
@@ -1,40 +1,67 @@
-# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
-
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+# RUN: llc -mtriple=i386-linux-gnu   -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
 --- |
-  ; ModuleID = '<stdin>'
-  source_filename = "<stdin>"
-  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-  target triple = "x86_64--linux-gnu"
+  define void @test_add_i32() {
+    ret void
+  }
 
-  define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
-    %ret = add i32 %arg1, %arg2
-    ret i32 %ret
+  define void @test_add_i64() {
+    ret void
   }
 
 ...
 ---
 name:            test_add_i32
+# ALL-LABEL: name:  test_add_i32
 alignment:       4
 legalized:       false
 regBankSelected: false
-selected:        false
-tracksRegLiveness: true
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
   - { id: 2, class: _ }
+# ALL:          %0(s32) = IMPLICIT_DEF
+# ALL-NEXT:     %1(s32) = IMPLICIT_DEF
+# ALL-NEXT:     %2(s32) = G_ADD %0, %1
+# ALL-NEXT:     RET 0
 body:             |
   bb.1 (%ir-block.0):
-    liveins: %edi, %esi
-    ;  CHECK-LABEL: name: test_add_i32
-    ;  CHECK: [[VAL1:%.*]](s32) = COPY %edi
-    ;  CHECK: [[VAL2:%.*]](s32) = COPY %esi
-    ;  CHECK: [[RES:%.*]](s32) = G_ADD [[VAL1:%.*]], [[VAL2:%.*]]
-
-    %0(s32) = COPY %edi
-    %1(s32) = COPY %esi
+    %0(s32) = IMPLICIT_DEF
+    %1(s32) = IMPLICIT_DEF
     %2(s32) = G_ADD %0, %1
-    %eax = COPY %2(s32)
-    RET 0, implicit %eax
+    RET 0
+
+...
+---
+name:            test_add_i64
+# ALL-LABEL: name:  test_add_i64
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+# X64:          %0(s64) = IMPLICIT_DEF
+# X64-NEXT:     %1(s64) = IMPLICIT_DEF
+# X64-NEXT:     %2(s64) = G_ADD %0, %1
+# X64-NEXT:     RET 0
+#
+# X32:          %0(s64) = IMPLICIT_DEF
+# X32-NEXT:     %1(s64) = IMPLICIT_DEF
+# X32-NEXT:     %3(s32), %4(s32) = G_UNMERGE_VALUES %0(s64)
+# X32-NEXT:     %5(s32), %6(s32) = G_UNMERGE_VALUES %1(s64)
+# X32-NEXT:     %12(s8) = G_CONSTANT i8 0
+# X32-NEXT:     %7(s1) = G_TRUNC %12(s8)
+# X32-NEXT:     %8(s32), %9(s1) = G_UADDE %3, %5, %7
+# X32-NEXT:     %10(s32), %11(s1) = G_UADDE %4, %6, %9
+# X32-NEXT:     %2(s64) = G_MERGE_VALUES %8(s32), %10(s32)
+# X32-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    %0(s64) = IMPLICIT_DEF
+    %1(s64) = IMPLICIT_DEF
+    %2(s64) = G_ADD %0, %1
+    RET 0
 
 ...
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
new file mode 100644
index 000000000000..a115d1fa3255
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
@@ -0,0 +1,36 @@
+# RUN: llc -mtriple=i386-linux-gnu -global-isel                       -run-pass=regbankselect %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+# RUN: llc -mtriple=i386-linux-gnu -global-isel -regbankselect-greedy -run-pass=regbankselect %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY
+
+--- |
+  define void @test_uadde_i32() {
+    ret void
+  }
+
+...
+---
+name:            test_uadde_i32
+# CHECK-LABEL: name:  test_uadde_i32
+alignment:       4
+legalized:       true
+regBankSelected: false
+# CHECK:      registers:
+# CHECK-NEXT:   - { id: 0, class: gpr }
+# CHECK-NEXT:   - { id: 1, class: gpr }
+# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 3, class: gpr }
+# CHECK-NEXT:   - { id: 4, class: gpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+body:             |
+  bb.0 (%ir-block.0):
+    %0(s32) = IMPLICIT_DEF
+    %1(s32) = IMPLICIT_DEF
+    %2(s1) = IMPLICIT_DEF
+    %3(s32), %4(s1) = G_UADDE %0, %1, %2
+    RET 0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
new file mode 100644
index 000000000000..8710aaa61a21
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
@@ -0,0 +1,63 @@
+# RUN: llc -mtriple=i386-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=X32
+--- |
+  define i64 @test_add_i64(i64 %a, i64 %b) {
+    %r = add i64 %a, %b
+    ret i64 %r
+  }
+
+...
+---
+name:            test_add_i64
+# X32-LABEL: name:  test_add_i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+# X32:      registers:
+# X32-NEXT:   - { id: 0, class: gr32 }
+# X32-NEXT:   - { id: 1, class: gr32 }
+# X32-NEXT:   - { id: 2, class: gr32 }
+# X32-NEXT:   - { id: 3, class: gr32 }
+# X32-NEXT:   - { id: 4, class: gpr }
+# X32-NEXT:   - { id: 5, class: gr32 }
+# X32-NEXT:   - { id: 6, class: gr32 }
+# X32-NEXT:   - { id: 7, class: gr32 }
+# X32-NEXT:   - { id: 8, class: gr32 }
+# X32-NEXT:   - { id: 9, class: gpr }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+  - { id: 7, class: gpr }
+  - { id: 8, class: gpr }
+  - { id: 9, class: gpr }
+# X32:          %0 = IMPLICIT_DEF
+# X32-NEXT:     %1 = IMPLICIT_DEF
+# X32-NEXT:     %2 = IMPLICIT_DEF
+# X32-NEXT:     %3 = IMPLICIT_DEF
+# X32-NEXT:     %5 = ADD32rr %0, %2, implicit-def %eflags
+# X32-NEXT:     %6 = COPY %eflags
+# X32-NEXT:     %eflags = COPY %6
+# X32-NEXT:     %7 = ADC32rr %1, %3, implicit-def %eflags, implicit %eflags
+# X32-NEXT:     %8 = COPY %eflags
+# X32-NEXT:     %eax = COPY %5
+# X32-NEXT:     %edx = COPY %7
+# X32-NEXT:     RET 0, implicit %eax, implicit %edx
+body:             |
+  bb.0 (%ir-block.0):
+    %0(s32) = IMPLICIT_DEF
+    %1(s32) = IMPLICIT_DEF
+    %2(s32) = IMPLICIT_DEF
+    %3(s32) = IMPLICIT_DEF
+    %9(s8) = G_CONSTANT i8 0
+    %4(s1) = G_TRUNC %9(s8)
+    %5(s32), %6(s1) = G_UADDE %0, %2, %4
+    %7(s32), %8(s1) = G_UADDE %1, %3, %6
+    %eax = COPY %5(s32)
+    %edx = COPY %7(s32)
+    RET 0, implicit %eax, implicit %edx
+
+...
diff --git a/test/CodeGen/X86/arg-copy-elide.ll b/test/CodeGen/X86/arg-copy-elide.ll
index b9a2eeeb7f8f..126f5a1c7976 100644
--- a/test/CodeGen/X86/arg-copy-elide.ll
+++ b/test/CodeGen/X86/arg-copy-elide.ll
@@ -253,9 +253,7 @@ entry:
 ; CHECK: calll _addrof_i32
 ; CHECK: retl
 
-
 ; Don't elide the copy when the alloca is escaped with a store.
-
 define void @escape_with_store(i32 %x) {
   %x1 = alloca i32
   %x2 = alloca i32*
@@ -268,9 +266,8 @@ define void @escape_with_store(i32 %x) {
 }
 
 ; CHECK-LABEL: _escape_with_store:
-; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
-; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
-; CHECK: movl %[[reg]], [[offs]](%esp)
+; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]]
+; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp)
 ; CHECK: calll _addrof_i32
 
 
diff --git a/test/CodeGen/X86/leaFixup32.mir b/test/CodeGen/X86/leaFixup32.mir
deleted file mode 100644
index 70aac21c7ff2..000000000000
--- a/test/CodeGen/X86/leaFixup32.mir
+++ /dev/null
@@ -1,508 +0,0 @@
-# RUN: llc -run-pass x86-fixup-LEAs -mcpu=corei7-avx -o - %s | FileCheck %s
---- |
-  ; ModuleID = 'test/CodeGen/X86/fixup-lea.ll'
-  source_filename = "test/CodeGen/X86/fixup-lea.ll"
-  target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
-  target triple = "i386"
-  ;generated using: llc -stop-after x86-pad-short-functions fixup-lea.ll > leaFinxup32.mir
-
-  ;test2add_32: 3 operands LEA32r that can be replaced with 2 add instructions
-  ; where ADD32ri8 is chosen
-  define i32 @test2add_32() {
-    ret i32 0
-  }
-
-  ;test2add_ebp_32: 3 operands LEA32r that can be replaced with 2 add instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @test2add_ebp_32() {
-    ret i32 0
-  }
-
-  ;test1add_ebp_32: 2 operands LEA32r where base register is ebp and can be replaced
-  ; with an add instruction
-  define i32 @test1add_ebp_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_32: 3 operands LEA32r that can be replaced with 1 lea 1 add instructions
-  define i32 @testleaadd_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_ebp_32: 3 operands LEA32r that can be replaced with 1 lea 1 add instructions
-  ; where the base is ebp register
-  define i32 @testleaadd_ebp_32() {
-    ret i32 0
-  }
-
-  ;test1lea_ebp_32: 2 operands LEA32r wher base register is rbp/r13/ebp and can be replaced
-  ; with a lea instruction
-  define i32 @test1lea_ebp_32() {
-    ret i32 0
-  }
- 
-  ;test2addi32_32: 3 operands LEA32r that can be replaced with 2 add instructions where ADD32ri32
-  ; is chosen
-  define i32 @test2addi32_32() {
-    ret i32 0
-  }
- 
-  ;test1mov1add_ebp_32: 2 operands LEA32r that can be replaced with 1 add 1 mov instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @test1mov1add_ebp_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_ebp_index_32: 3 operands LEA32r that can be replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is offset
-  define i32 @testleaadd_ebp_index_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_ebp_index2_32: 3 operands LEA32r that can be replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is scale
-  define i32 @testleaadd_ebp_index2_32() {
-    ret i32 0
-  }
-  
-  ;test_skip_opt_32: 3 operands LEA32r that can not be replaced with 2 instructions
-  define i32 @test_skip_opt_32() {
-    ret i32 0
-  }
-
-  ;test_skip_eflags_32: LEA32r that cannot be replaced since its not safe to clobber eflags
-  define i32 @test_skip_eflags_32() {
-    ret i32 0
-  }
-
-...
----
-name:            test2add_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %eax = ADD32rr %eax, killed %ebp
-    ; CHECK: %eax = ADD32ri8 %eax, -5
- 
-    %eax = LEA32r killed %eax, 1, killed %ebp, -5, _
-    RETQ %eax
-
-...
----
-name:            test2add_ebp_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %ebp = ADD32rr %ebp, killed %eax
-    ; CHECK: %ebp = ADD32ri8 %ebp, -5
- 
-    %ebp = LEA32r killed %ebp, 1, killed %eax, -5, _
-    RETQ %ebp
-
-...
----
-name:            test1add_ebp_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %ebp = ADD32rr %ebp, killed %eax
- 
-    %ebp = LEA32r killed %ebp, 1, killed %eax, 0, _
-    RETQ %ebp
-
-...
----
-name:            testleaadd_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-  - { reg: '%ebx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %esi
-    ; CHECK: %ebx = LEA32r killed %eax, 1, killed %ebp, 0
-    ; CHECK: %ebx = ADD32ri8 %ebx, -5
- 
-    %ebx = LEA32r killed %eax, 1, killed %ebp, -5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_ebp_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-  - { reg: '%ebx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %ebx = LEA32r killed %eax, 1, killed %ebp, 0, _
-    ; CHECK: %ebx = ADD32ri8  %ebx, -5
- 
-    %ebx = LEA32r killed %ebp, 1, killed %eax, -5, _
-    RETQ %ebx
-
-...
----
-name:            test1lea_ebp_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-  - { reg: '%ebx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %ebx = LEA32r killed %eax, 1, killed %ebp, 0, _
- 
-    %ebx = LEA32r killed %ebp, 1, killed %eax, 0, _
-    RETQ %ebx
-
-...
----
-name:            test2addi32_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp
-    ; CHECK: %eax = ADD32rr %eax, killed %ebp
-    ; CHECK: %eax = ADD32ri %eax, 129
- 
-    %eax = LEA32r killed %eax, 1, killed %ebp, 129, _
-    RETQ %eax
-
-...
----
-name:            test1mov1add_ebp_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%eax' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebx = MOV32rr killed %ebp
-    ; CHECK: %ebx = ADD32rr %ebx, killed %ebp
- 
-    %ebx = LEA32r killed %ebp, 1, killed %ebp, 0, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_ebp_index_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%ebx' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebx = LEA32r _, 1, killed %ebp, 5, _
-    ; CHECK: %ebx = ADD32rr %ebx, killed %ebp
- 
-    %ebx = LEA32r killed %ebp, 1, killed %ebp, 5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_ebp_index2_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%ebx' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebx = LEA32r _, 4, killed %ebp, 5, _
-    ; CHECK: %ebx = ADD32rr %ebx, killed %ebp
- 
-    %ebx = LEA32r killed %ebp, 4, killed %ebp, 5, _
-    RETQ %ebx
-
-...
----
-name:            test_skip_opt_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%ebx' }
-  - { reg: '%ebp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebp = LEA32r killed %ebp, 4, killed %ebp, 0, _
- 
-    %ebp = LEA32r killed %ebp, 4, killed %ebp, 0, _
-    RETQ %ebp
-
-...
----
-name:            test_skip_eflags_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%ebp' }
-  - { reg: '%eax' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebx = LEA32r killed %eax, 4, killed %eax, 5, _
-    ; CHECK: %ebp = LEA32r killed %ebx, 4, killed %ebx, 0, _
-    ; CHECK: %ebp = ADD32ri8 %ebp, 5
-   
-    CMP32rr   %eax, killed %ebx, implicit-def %eflags
-    %ebx = LEA32r killed %eax, 4, killed %eax, 5, _
-    JE_1 %bb.1, implicit %eflags
-    RETQ %ebx
-  bb.1:
-    liveins: %eax, %ebp, %ebx
-    %ebp = LEA32r killed %ebx, 4, killed %ebx, 5, _
-    RETQ %ebp
-
-...
-
-
-
diff --git a/test/CodeGen/X86/leaFixup64.mir b/test/CodeGen/X86/leaFixup64.mir
deleted file mode 100644
index 9b0058750598..000000000000
--- a/test/CodeGen/X86/leaFixup64.mir
+++ /dev/null
@@ -1,1041 +0,0 @@
-# RUN: llc -run-pass x86-fixup-LEAs -mcpu=corei7-avx -o - %s | FileCheck %s
---- |
-  ; ModuleID = 'lea-2.ll'
-  source_filename = "lea-2.ll"
-  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-  ;generated using: llc -stop-after x86-pad-short-functions lea-2.ll > leaFinxup64.mir
-
-  ;testleaadd_64_32_1: 3 operands LEA64_32r cannot be replaced with 2 add instructions
-  ; but can be replaced with 1 lea + 1 add
-  define i32 @testleaadd_64_32_1() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_64_32_1: 3 operands LEA64_32r cannot be replaced with 2 add instructions
-  ; where the base is rbp/r13/ebp register but it can be replaced with 1 lea + 1 add
-  define i32 @testleaadd_rbp_64_32_1() {
-    ret i32 0
-  }
-
-  ;test1lea_rbp_64_32_1: 2 operands LEA64_32r where base register is rbp/r13/ebp and can not
-  ; be replaced with an add instruction but can be replaced with 1 lea instruction
-  define i32 @test1lea_rbp_64_32_1() {
-    ret i32 0
-  }
-
-  ;test2add_64: 3 operands LEA64r that can be replaced with 2 add instructions
-  define i32 @test2add_64() {
-    ret i32 0
-  }
-
-  ;test2add_rbp_64: 3 operands LEA64r that can be replaced with 2 add instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @test2add_rbp_64() {
-    ret i32 0
-  }
-
-  ;test1add_rbp_64: 2 operands LEA64r where base register is rbp/r13/ebp and can be replaced
-  ; with an add instruction
-  define i32 @test1add_rbp_64() {
-    ret i32 0
-  }
-
-  ;testleaadd_64_32: 3 operands LEA64_32r that can be replaced with 1 lea 1 add instructions
-  define i32 @testleaadd_64_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_64_32: 3 operands LEA64_32r that can be replaced with 1 lea 1 add instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @testleaadd_rbp_64_32() {
-    ret i32 0
-  }
-
-  ;test1lea_rbp_64_32: 2 operands LEA64_32r where base register is rbp/r13/ebp and can be replaced
-  ; with a lea instruction
-  define i32 @test1lea_rbp_64_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_64: 3 operands LEA64r that can be replaced with 1 lea 1 add instructions
-  define i32 @testleaadd_64() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_64: 3 operands LEA64r that can be replaced with 1 lea 1 add instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @testleaadd_rbp_64() {
-    ret i32 0
-  }
-
-  ;test1lea_rbp_64: 2 operands LEA64r wher base register is rbp/r13/ebp and can be replaced
-  ; with a lea instruction
-  define i32 @test1lea_rbp_64() {
-    ret i32 0
-  }
-
-  ;test8: dst = base & scale!=1, can't optimize
-  define i32 @test8() {
-      ret i32 0
-  }
- 
-  ;testleaaddi32_64_32: 3 operands LEA64_32r that can be replaced with 1 lea + 1 add instructions where
-  ; ADD64ri32 is chosen
-  define i32 @testleaaddi32_64_32() {
-    ret i32 0
-  }
- 
-  ;test1mov1add_rbp_64_32: 2 operands LEA64_32r cannot be replaced with 1 add 1 mov instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @test1mov1add_rbp_64_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_index_64_32: 3 operands LEA64_32r that cannot replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is offset
-  define i32 @testleaadd_rbp_index_64_32() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_index2_64_32: 3 operands LEA64_32r that cannot replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is scale
-  define i32 @testleaadd_rbp_index2_64_32() {
-    ret i32 0
-  }
- 
-  ;test2addi32_64: 3 operands LEA64r that can be replaced with 2 add instructions where ADD64ri32
-  ; is chosen
-  define i32 @test2addi32_64() {
-    ret i32 0
-  }
- 
-  ;test1mov1add_rbp_64: 2 operands LEA64r that can be replaced with 1 add 1 mov instructions
-  ; where the base is rbp/r13/ebp register
-  define i32 @test1mov1add_rbp_64() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_index_64: 3 operands LEA64r that can be replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is offset
-  define i32 @testleaadd_rbp_index_64() {
-    ret i32 0
-  }
-
-  ;testleaadd_rbp_index2_64: 3 operands LEA64r that can be replaced with 1 lea 1 add instructions
-  ; where the base and the index are ebp register and there is scale
-  define i32 @testleaadd_rbp_index2_64() {
-    ret i32 0
-  }
-
-  ;test_skip_opt_64: 3 operands LEA64r that can not be replaced with 2 instructions
-  define i32 @test_skip_opt_64() {
-    ret i32 0
-  }
-
-  ;test_skip_eflags_64: LEA64r that cannot be replaced since its not safe to clobber eflags
-  define i32 @test_skip_eflags_64() {
-    ret i32 0
-  }
-
-  ;test_skip_opt_64_32: 3 operands LEA64_32r that can not be replaced with 2 instructions
-  define i32 @test_skip_opt_64_32() {
-    ret i32 0
-  }
-
-  ;test_skip_eflags_64_32: LEA64_32r that cannot be replaced since its not safe to clobber eflags
-  define i32 @test_skip_eflags_64_32() {
-    ret i32 0
-  }
-
-
-...
----
-name:            testleaadd_64_32_1
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %eax = LEA64_32r killed %rax, 1, killed %rbp, 0
-    ; CHECK: %eax = ADD32ri8 %eax, -5
- 
-    %eax = LEA64_32r killed %rax, 1, killed %rbp, -5, _
-    RETQ %eax
-
-...
----
-name:            testleaadd_rbp_64_32_1
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %ebp = LEA64_32r killed %rax, 1,  killed %rbp, 0
-    ; CHECK: %ebp = ADD32ri8 %ebp, -5
- 
-    %ebp = LEA64_32r killed %rbp, 1, killed %rax, -5, _
-    RETQ %ebp
-
-...
----
-name:            test1lea_rbp_64_32_1
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %ebp = LEA64_32r killed %rax, 1, killed %rbp, 0
- 
-    %ebp = LEA64_32r killed %rbp, 1, killed %rax, 0, _
-    RETQ %ebp
-
-...
----
-name:            test2add_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rax = ADD64rr %rax, killed %rbp
-    ; CHECK: %rax = ADD64ri8 %rax, -5
- 
-    %rax = LEA64r killed %rax, 1, killed %rbp, -5, _
-    RETQ %eax
-
-...
----
-name:            test2add_rbp_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rbp = ADD64rr %rbp, killed %rax
-    ; CHECK: %rbp = ADD64ri8 %rbp, -5
- 
-    %rbp = LEA64r killed %rbp, 1, killed %rax, -5, _
-    RETQ %ebp
-
-...
----
-name:            test1add_rbp_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rbp = ADD64rr %rbp, killed %rax
- 
-    %rbp = LEA64r killed %rbp, 1, killed %rax, 0, _
-    RETQ %ebp
-
-...
----
-name:            testleaadd_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %ebx = LEA64_32r killed %rax, 1, killed %rbp, 0, _
-    ; CHECK: %ebx = ADD32ri8 %ebx, -5
- 
-    %ebx = LEA64_32r killed %rax, 1, killed %rbp, -5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %ebx = LEA64_32r killed %rax, 1, killed %rbp, 0, _
-    ; CHECK: %ebx = ADD32ri8 %ebx, -5
- 
-    %ebx = LEA64_32r killed %rbp, 1, killed %rax, -5, _
-    RETQ %ebx
-
-...
----
-name:            test1lea_rbp_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %ebx = LEA64_32r killed %rax, 1, killed %rbp, 0, _
- 
-    %ebx = LEA64_32r killed %rbp, 1, killed %rax, 0, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rbx = LEA64r killed %rax, 1, killed %rbp, 0, _
-    ; CHECK: %rbx = ADD64ri8 %rbx, -5
- 
-    %rbx = LEA64r killed %rax, 1, killed %rbp, -5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rbx = LEA64r killed %rax, 1, killed %rbp, 0, _
-    ; CHECK: %rbx = ADD64ri8 %rbx, -5
- 
-    %rbx = LEA64r killed %rbp, 1, killed %rax, -5, _
-    RETQ %ebx
-
-...
----
-name:            test1lea_rbp_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-  - { reg: '%rbx' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rbx = LEA64r killed %rax, 1, killed %rbp, 0, _
- 
-    %rbx = LEA64r killed %rbp, 1, killed %rax, 0, _
-    RETQ %ebx
-
-...
----
-name:            test8
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rdi' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rdi, %rbp
-    ; CHECK:  %r12 = LEA64r _, 2, killed %r13, 5, _
-    ; CHECK:  %r12 = ADD64rr %r12, killed %rbp
-    %rbp = KILL %rbp, implicit-def %rbp
-    %r13 = KILL %rdi, implicit-def %r13
-    %r12 = LEA64r killed %rbp, 2, killed %r13, 5, _
-    RETQ %r12
-
-...
----
-name:            testleaaddi32_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %eax = LEA64_32r killed %rax, 1, killed %rbp, 0
-    ; CHECK: %eax = ADD32ri %eax, 129
- 
-    %eax = LEA64_32r killed %rax, 1, killed %rbp, 129, _
-    RETQ %eax
-
-...
----
-name:            test1mov1add_rbp_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %ebx = LEA64_32r killed %rbp, 1, killed %rbp, 0, _
-
-    %ebx = LEA64_32r killed %rbp, 1, killed %rbp, 0, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_index_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %ebx = LEA64_32r killed %rbp, 1, killed %rbp, 5, _
- 
-    %ebx = LEA64_32r killed %rbp, 1, killed %rbp, 5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_index2_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %eax, %ebp, %ebx
-    ; CHECK: %ebx = LEA64_32r killed %rbp, 4, killed %rbp, 5, _
- 
-    %ebx = LEA64_32r killed %rbp, 4, killed %rbp, 5, _
-    RETQ %ebx
-
-...
----
-name:            test2addi32_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp
-    ; CHECK: %rax = ADD64rr %rax, killed %rbp
-    ; CHECK: %rax = ADD64ri32 %rax, 129
- 
-    %rax = LEA64r killed %rax, 1, killed %rbp, 129, _
-    RETQ %eax
-
-...
----
-name:            test1mov1add_rbp_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rax' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %rbx = MOV64rr killed %rbp
-    ; CHECK: %rbx = ADD64rr %rbx, killed %rbp
- 
-    %rbx = LEA64r killed %rbp, 1, killed %rbp, 0, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_index_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %rbx = LEA64r _, 1, killed %rbp, 5, _
-    ; CHECK: %rbx = ADD64rr %rbx, killed %rbp
- 
-    %rbx = LEA64r killed %rbp, 1, killed %rbp, 5, _
-    RETQ %ebx
-
-...
----
-name:            testleaadd_rbp_index2_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %rbx = LEA64r _, 4, killed %rbp, 5, _
-    ; CHECK: %rbx = ADD64rr %rbx, killed %rbp
- 
-    %rbx = LEA64r killed %rbp, 4, killed %rbp, 5, _
-    RETQ %ebx
-
-...
----
-name:            test_skip_opt_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %rbp = LEA64r killed %rbp, 4, killed %rbp, 0, _
- 
-    %rbp = LEA64r killed %rbp, 4, killed %rbp, 0, _
-    RETQ %ebp
-
-...
----
-name:            test_skip_eflags_64
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbp' }
-  - { reg: '%rax' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %rbx = LEA64r killed %rax, 4, killed %rax, 5, _
-    ; CHECK: %rbp = LEA64r killed %rbx, 4, killed %rbx, 0, _
-    ; CHECK: %rbp = ADD64ri8 %rbp, 5
-   
-    CMP64rr   %rax, killed %rbx, implicit-def %eflags
-    %rbx = LEA64r killed %rax, 4, killed %rax, 5, _
-    JE_1 %bb.1, implicit %eflags
-    RETQ %ebx
-  bb.1:
-    liveins: %rax, %rbp, %rbx
-    %rbp = LEA64r killed %rbx, 4, killed %rbx, 5, _
-    RETQ %ebp
-
-...
----
-name:            test_skip_opt_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbx' }
-  - { reg: '%rbp' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %ebp = LEA64_32r killed %rbp, 4, killed %rbp, 0, _
- 
-    %ebp = LEA64_32r killed %rbp, 4, killed %rbp, 0, _
-    RETQ %ebp
-
-...
----
-name:            test_skip_eflags_64_32
-alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:         
-  - { reg: '%rbp' }
-  - { reg: '%rax' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    0
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-body:             |
-  bb.0 (%ir-block.0):
-    liveins: %rax, %rbp, %rbx
-    ; CHECK: %ebx = LEA64_32r killed %rax, 4, killed %rax, 5, _
-    ; CHECK: %ebp = LEA64_32r killed %rbx, 4, killed %rbx, 0, _
-    ; CHECK: %ebp = ADD32ri8 %ebp, 5
-   
-    CMP64rr   %rax, killed %rbx, implicit-def %eflags
-    %ebx = LEA64_32r killed %rax, 4, killed %rax, 5, _
-    JE_1 %bb.1, implicit %eflags
-    RETQ %ebx
-  bb.1:
-    liveins: %rax, %rbp, %rbx
-    %ebp = LEA64_32r killed %rbx, 4, killed %rbx, 5, _
-    RETQ %ebp
-
-...
-
-
-
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 33d5caba597c..d49c88724331 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -9,33 +9,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pushl %ebp
 ; X32-SSE-NEXT:    movl %esp, %ebp
-; X32-SSE-NEXT:    pushl %esi
 ; X32-SSE-NEXT:    andl $-16, %esp
 ; X32-SSE-NEXT:    subl $16, %esp
 ; X32-SSE-NEXT:    movl 72(%ebp), %eax
 ; X32-SSE-NEXT:    movl 76(%ebp), %ecx
-; X32-SSE-NEXT:    movl 12(%ebp), %edx
 ; X32-SSE-NEXT:    movdqa 56(%ebp), %xmm3
 ; X32-SSE-NEXT:    movdqa 40(%ebp), %xmm4
 ; X32-SSE-NEXT:    movdqa 24(%ebp), %xmm5
-; X32-SSE-NEXT:    movl 8(%ebp), %esi
-; X32-SSE-NEXT:    addps .LCPI0_0, %xmm0
-; X32-SSE-NEXT:    movntps %xmm0, (%esi)
-; X32-SSE-NEXT:    paddq .LCPI0_1, %xmm2
-; X32-SSE-NEXT:    movntdq %xmm2, (%esi)
-; X32-SSE-NEXT:    addpd .LCPI0_2, %xmm1
-; X32-SSE-NEXT:    movntpd %xmm1, (%esi)
-; X32-SSE-NEXT:    paddd .LCPI0_3, %xmm5
-; X32-SSE-NEXT:    movntdq %xmm5, (%esi)
-; X32-SSE-NEXT:    paddw .LCPI0_4, %xmm4
-; X32-SSE-NEXT:    movntdq %xmm4, (%esi)
-; X32-SSE-NEXT:    paddb .LCPI0_5, %xmm3
-; X32-SSE-NEXT:    movntdq %xmm3, (%esi)
-; X32-SSE-NEXT:    movntil %edx, (%esi)
-; X32-SSE-NEXT:    movntil %ecx, 4(%esi)
-; X32-SSE-NEXT:    movntil %eax, (%esi)
-; X32-SSE-NEXT:    leal -4(%ebp), %esp
-; X32-SSE-NEXT:    popl %esi
+; X32-SSE-NEXT:    movl 8(%ebp), %edx
+; X32-SSE-NEXT:    addps {{\.LCPI.*}}, %xmm0
+; X32-SSE-NEXT:    movntps %xmm0, (%edx)
+; X32-SSE-NEXT:    paddq {{\.LCPI.*}}, %xmm2
+; X32-SSE-NEXT:    movntdq %xmm2, (%edx)
+; X32-SSE-NEXT:    addpd {{\.LCPI.*}}, %xmm1
+; X32-SSE-NEXT:    movntpd %xmm1, (%edx)
+; X32-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm5
+; X32-SSE-NEXT:    movntdq %xmm5, (%edx)
+; X32-SSE-NEXT:    paddw {{\.LCPI.*}}, %xmm4
+; X32-SSE-NEXT:    movntdq %xmm4, (%edx)
+; X32-SSE-NEXT:    paddb {{\.LCPI.*}}, %xmm3
+; X32-SSE-NEXT:    movntdq %xmm3, (%edx)
+; X32-SSE-NEXT:    movntil %ecx, 4(%edx)
+; X32-SSE-NEXT:    movntil %eax, (%edx)
+; X32-SSE-NEXT:    movl %ebp, %esp
 ; X32-SSE-NEXT:    popl %ebp
 ; X32-SSE-NEXT:    retl
 ;
@@ -43,33 +39,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    pushl %ebp
 ; X32-AVX-NEXT:    movl %esp, %ebp
-; X32-AVX-NEXT:    pushl %esi
 ; X32-AVX-NEXT:    andl $-16, %esp
 ; X32-AVX-NEXT:    subl $16, %esp
 ; X32-AVX-NEXT:    movl 72(%ebp), %eax
 ; X32-AVX-NEXT:    movl 76(%ebp), %ecx
-; X32-AVX-NEXT:    movl 12(%ebp), %edx
 ; X32-AVX-NEXT:    vmovdqa 56(%ebp), %xmm3
 ; X32-AVX-NEXT:    vmovdqa 40(%ebp), %xmm4
 ; X32-AVX-NEXT:    vmovdqa 24(%ebp), %xmm5
-; X32-AVX-NEXT:    movl 8(%ebp), %esi
-; X32-AVX-NEXT:    vaddps .LCPI0_0, %xmm0, %xmm0
-; X32-AVX-NEXT:    vmovntps %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddq .LCPI0_1, %xmm2, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vaddpd .LCPI0_2, %xmm1, %xmm0
-; X32-AVX-NEXT:    vmovntpd %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddd .LCPI0_3, %xmm5, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddw .LCPI0_4, %xmm4, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddb .LCPI0_5, %xmm3, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    movntil %edx, (%esi)
-; X32-AVX-NEXT:    movntil %ecx, 4(%esi)
-; X32-AVX-NEXT:    movntil %eax, (%esi)
-; X32-AVX-NEXT:    leal -4(%ebp), %esp
-; X32-AVX-NEXT:    popl %esi
+; X32-AVX-NEXT:    movl 8(%ebp), %edx
+; X32-AVX-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-AVX-NEXT:    vmovntps %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
+; X32-AVX-NEXT:    vmovntpd %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    movntil %ecx, 4(%edx)
+; X32-AVX-NEXT:    movntil %eax, (%edx)
+; X32-AVX-NEXT:    movl %ebp, %esp
 ; X32-AVX-NEXT:    popl %ebp
 ; X32-AVX-NEXT:    retl
 ;
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
index 35f96eda35e1..a1f1e084d330 100644
--- a/test/CodeGen/X86/psubus.ll
+++ b/test/CodeGen/X86/psubus.ll
@@ -1,219 +1,169 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
 
-define void @test1(i16* nocapture %head) nounwind {
+define <8 x i16> @test1(<8 x i16> %x) nounwind {
 ; SSE-LABEL: test1:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
 ; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test1:
 ; AVX:       ## BB#0: ## %vector.ph
-; AVX-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>, <8 x i16>* %1, align 2
-  %3 = icmp slt <8 x i16> %2, zeroinitializer
-  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
-  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
-  store <8 x i16> %5, <8 x i16>* %1, align 2
-  ret void
+  %0 = icmp slt <8 x i16> %x, zeroinitializer
+  %1 = xor <8 x i16> %x, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %res = select <8 x i1> %0, <8 x i16> %1, <8 x i16> zeroinitializer
+  ret <8 x i16> %res
 }
 
-define void @test2(i16* nocapture %head) nounwind {
+define <8 x i16> @test2(<8 x i16> %x) nounwind {
 ; SSE-LABEL: test2:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
 ; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test2:
 ; AVX:       ## BB#0: ## %vector.ph
-; AVX-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>, <8 x i16>* %1, align 2
-  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
-  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
-  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
-  store <8 x i16> %5, <8 x i16>* %1, align 2
-  ret void
+  %0 = icmp ugt <8 x i16> %x, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %1 = add <8 x i16> %x, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %res = select <8 x i1> %0, <8 x i16> %1, <8 x i16> zeroinitializer
+  ret <8 x i16> %res
 }
 
-define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
+define <8 x i16> @test3(<8 x i16> %x, i16 zeroext %w) nounwind {
 ; SSE-LABEL: test3:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movd %esi, %xmm0
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE-NEXT:    movdqu (%rdi), %xmm1
-; SSE-NEXT:    psubusw %xmm0, %xmm1
-; SSE-NEXT:    movdqu %xmm1, (%rdi)
+; SSE-NEXT:    movd %edi, %xmm1
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT:    psubusw %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test3:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovd %esi, %xmm0
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
-; AVX1-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    vmovd %edi, %xmm1
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test3:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovd %esi, %xmm0
-; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
-; AVX2-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT:    vmovd %edi, %xmm1
+; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <8 x i16> undef, i16 %w, i32 0
   %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
-  %1 = getelementptr inbounds i16, i16* %head, i64 0
-  %2 = bitcast i16* %1 to <8 x i16>*
-  %3 = load <8 x i16>, <8 x i16>* %2, align 2
-  %4 = icmp ult <8 x i16> %3, %broadcast15
-  %5 = sub <8 x i16> %3, %broadcast15
-  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
-  store <8 x i16> %6, <8 x i16>* %2, align 2
-  ret void
+  %1 = icmp ult <8 x i16> %x, %broadcast15
+  %2 = sub <8 x i16> %x, %broadcast15
+  %res = select <8 x i1> %1, <8 x i16> zeroinitializer, <8 x i16> %2
+  ret <8 x i16> %res
 }
 
-define void @test4(i8* nocapture %head) nounwind {
+define <16 x i8> @test4(<16 x i8> %x) nounwind {
 ; SSE-LABEL: test4:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
 ; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test4:
 ; AVX:       ## BB#0: ## %vector.ph
-; AVX-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i8, i8* %head, i64 0
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %2 = load <16 x i8>, <16 x i8>* %1, align 1
-  %3 = icmp slt <16 x i8> %2, zeroinitializer
-  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
-  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
-  store <16 x i8> %5, <16 x i8>* %1, align 1
-  ret void
+  %0 = icmp slt <16 x i8> %x, zeroinitializer
+  %1 = xor <16 x i8> %x, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %res = select <16 x i1> %0, <16 x i8> %1, <16 x i8> zeroinitializer
+  ret <16 x i8> %res
 }
 
-define void @test5(i8* nocapture %head) nounwind {
+define <16 x i8> @test5(<16 x i8> %x) nounwind {
 ; SSE-LABEL: test5:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
 ; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test5:
 ; AVX:       ## BB#0: ## %vector.ph
-; AVX-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i8, i8* %head, i64 0
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %2 = load <16 x i8>, <16 x i8>* %1, align 1
-  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
-  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
-  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
-  store <16 x i8> %5, <16 x i8>* %1, align 1
-  ret void
+  %0 = icmp ugt <16 x i8> %x, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %1 = add <16 x i8> %x, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %res = select <16 x i1> %0, <16 x i8> %1, <16 x i8> zeroinitializer
+  ret <16 x i8> %res
 }
 
-define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
+define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
 ; SSE2-LABEL: test6:
 ; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT:    movdqu (%rdi), %xmm1
-; SSE2-NEXT:    psubusb %xmm0, %xmm1
-; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT:    psubusb %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test6:
 ; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movd %esi, %xmm0
-; SSSE3-NEXT:    pxor %xmm1, %xmm1
-; SSSE3-NEXT:    pshufb %xmm1, %xmm0
-; SSSE3-NEXT:    movdqu (%rdi), %xmm1
-; SSSE3-NEXT:    psubusb %xmm0, %xmm1
-; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    movd %edi, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    pshufb %xmm2, %xmm1
+; SSSE3-NEXT:    psubusb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: test6:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    movd %edi, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pshufb %xmm2, %xmm1
+; SSE41-NEXT:    psubusb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
 ; AVX1-LABEL: test6:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovd %esi, %xmm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
-; AVX1-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    vmovd %edi, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test6:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovd %esi, %xmm0
-; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
-; AVX2-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT:    vmovd %edi, %xmm1
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <16 x i8> undef, i8 %w, i32 0
   %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = getelementptr inbounds i8, i8* %head, i64 0
-  %2 = bitcast i8* %1 to <16 x i8>*
-  %3 = load <16 x i8>, <16 x i8>* %2, align 1
-  %4 = icmp ult <16 x i8> %3, %broadcast15
-  %5 = sub <16 x i8> %3, %broadcast15
-  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
-  store <16 x i8> %6, <16 x i8>* %2, align 1
-  ret void
+  %1 = icmp ult <16 x i8> %x, %broadcast15
+  %2 = sub <16 x i8> %x, %broadcast15
+  %res = select <16 x i1> %1, <16 x i8> zeroinitializer, <16 x i8> %2
+  ret <16 x i8> %res
 }
 
-define void @test7(i16* nocapture %head) nounwind {
+define <16 x i16> @test7(<16 x i16> %x) nounwind {
 ; SSE-LABEL: test7:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
-; SSE-NEXT:    movdqu 16(%rdi), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
 ; SSE-NEXT:    psubusw %xmm2, %xmm0
 ; SSE-NEXT:    psubusw %xmm2, %xmm1
-; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test7:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm2, %xmm1
@@ -221,43 +171,29 @@ define void @test7(i16* nocapture %head) nounwind {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test7:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <16 x i16>*
-  %2 = load <16 x i16>, <16 x i16>* %1, align 2
-  %3 = icmp slt <16 x i16> %2, zeroinitializer
-  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
-  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
-  store <16 x i16> %5, <16 x i16>* %1, align 2
-  ret void
+  %0 = icmp slt <16 x i16> %x, zeroinitializer
+  %1 = xor <16 x i16> %x, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %res = select <16 x i1> %0, <16 x i16> %1, <16 x i16> zeroinitializer
+  ret <16 x i16> %res
 }
 
-define void @test8(i16* nocapture %head) nounwind {
+define <16 x i16> @test8(<16 x i16> %x) nounwind {
 ; SSE-LABEL: test8:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
-; SSE-NEXT:    movdqu 16(%rdi), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
 ; SSE-NEXT:    psubusw %xmm2, %xmm0
 ; SSE-NEXT:    psubusw %xmm2, %xmm1
-; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test8:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
@@ -271,48 +207,33 @@ define void @test8(i16* nocapture %head) nounwind {
 ; AVX1-NEXT:    vpaddw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test8:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <16 x i16>*
-  %2 = load <16 x i16>, <16 x i16>* %1, align 2
-  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
-  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
-  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
-  store <16 x i16> %5, <16 x i16>* %1, align 2
-  ret void
-
+  %0 = icmp ugt <16 x i16> %x, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %1 = add <16 x i16> %x, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %res = select <16 x i1> %0, <16 x i16> %1, <16 x i16> zeroinitializer
+  ret <16 x i16> %res
 }
 
-define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
+define <16 x i16> @test9(<16 x i16> %x, i16 zeroext %w) nounwind {
 ; SSE-LABEL: test9:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movd %esi, %xmm0
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE-NEXT:    movdqu (%rdi), %xmm1
-; SSE-NEXT:    movdqu 16(%rdi), %xmm2
-; SSE-NEXT:    psubusw %xmm0, %xmm1
-; SSE-NEXT:    psubusw %xmm0, %xmm2
-; SSE-NEXT:    movdqu %xmm2, 16(%rdi)
-; SSE-NEXT:    movdqu %xmm1, (%rdi)
+; SSE-NEXT:    movd %edi, %xmm2
+; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-NEXT:    psubusw %xmm2, %xmm0
+; SSE-NEXT:    psubusw %xmm2, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test9:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vmovd %esi, %xmm2
+; AVX1-NEXT:    vmovd %edi, %xmm2
 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
 ; AVX1-NEXT:    vpsubw %xmm2, %xmm1, %xmm3
@@ -324,47 +245,33 @@ define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
 ; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test9:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovd %esi, %xmm0
-; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
-; AVX2-NEXT:    vpsubusw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vmovd %edi, %xmm1
+; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
+; AVX2-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <16 x i16> undef, i16 %w, i32 0
   %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
-  %1 = getelementptr inbounds i16, i16* %head, i64 0
-  %2 = bitcast i16* %1 to <16 x i16>*
-  %3 = load <16 x i16>, <16 x i16>* %2, align 2
-  %4 = icmp ult <16 x i16> %3, %broadcast15
-  %5 = sub <16 x i16> %3, %broadcast15
-  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
-  store <16 x i16> %6, <16 x i16>* %2, align 2
-  ret void
+  %1 = icmp ult <16 x i16> %x, %broadcast15
+  %2 = sub <16 x i16> %x, %broadcast15
+  %res = select <16 x i1> %1, <16 x i16> zeroinitializer, <16 x i16> %2
+  ret <16 x i16> %res
 }
 
-define void @test10(i8* nocapture %head) nounwind {
+define <32 x i8> @test10(<32 x i8> %x) nounwind {
 ; SSE-LABEL: test10:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
-; SSE-NEXT:    movdqu 16(%rdi), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 ; SSE-NEXT:    psubusb %xmm2, %xmm0
 ; SSE-NEXT:    psubusb %xmm2, %xmm1
-; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test10:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
@@ -372,44 +279,29 @@ define void @test10(i8* nocapture %head) nounwind {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test10:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i8, i8* %head, i64 0
-  %1 = bitcast i8* %0 to <32 x i8>*
-  %2 = load <32 x i8>, <32 x i8>* %1, align 1
-  %3 = icmp slt <32 x i8> %2, zeroinitializer
-  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
-  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
-  store <32 x i8> %5, <32 x i8>* %1, align 1
-  ret void
-
+  %0 = icmp slt <32 x i8> %x, zeroinitializer
+  %1 = xor <32 x i8> %x, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %res = select <32 x i1> %0, <32 x i8> %1, <32 x i8> zeroinitializer
+  ret <32 x i8> %res
 }
 
-define void @test11(i8* nocapture %head) nounwind {
+define <32 x i8> @test11(<32 x i8> %x) nounwind {
 ; SSE-LABEL: test11:
 ; SSE:       ## BB#0: ## %vector.ph
-; SSE-NEXT:    movdqu (%rdi), %xmm0
-; SSE-NEXT:    movdqu 16(%rdi), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
 ; SSE-NEXT:    psubusb %xmm2, %xmm0
 ; SSE-NEXT:    psubusb %xmm2, %xmm1
-; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
-; SSE-NEXT:    movdqu %xmm0, (%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test11:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
@@ -423,60 +315,51 @@ define void @test11(i8* nocapture %head) nounwind {
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test11:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i8, i8* %head, i64 0
-  %1 = bitcast i8* %0 to <32 x i8>*
-  %2 = load <32 x i8>, <32 x i8>* %1, align 1
-  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
-  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
-  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
-  store <32 x i8> %5, <32 x i8>* %1, align 1
-  ret void
+  %0 = icmp ugt <32 x i8> %x, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %1 = add <32 x i8> %x, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %res = select <32 x i1> %0, <32 x i8> %1, <32 x i8> zeroinitializer
+  ret <32 x i8> %res
 }
 
-define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
+define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
 ; SSE2-LABEL: test12:
 ; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT:    movdqu (%rdi), %xmm1
-; SSE2-NEXT:    movdqu 16(%rdi), %xmm2
-; SSE2-NEXT:    psubusb %xmm0, %xmm1
-; SSE2-NEXT:    psubusb %xmm0, %xmm2
-; SSE2-NEXT:    movdqu %xmm2, 16(%rdi)
-; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    movd %edi, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE2-NEXT:    psubusb %xmm2, %xmm0
+; SSE2-NEXT:    psubusb %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test12:
 ; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movd %esi, %xmm0
-; SSSE3-NEXT:    pxor %xmm1, %xmm1
-; SSSE3-NEXT:    pshufb %xmm1, %xmm0
-; SSSE3-NEXT:    movdqu (%rdi), %xmm1
-; SSSE3-NEXT:    movdqu 16(%rdi), %xmm2
-; SSSE3-NEXT:    psubusb %xmm0, %xmm1
-; SSSE3-NEXT:    psubusb %xmm0, %xmm2
-; SSSE3-NEXT:    movdqu %xmm2, 16(%rdi)
-; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    movd %edi, %xmm2
+; SSSE3-NEXT:    pxor %xmm3, %xmm3
+; SSSE3-NEXT:    pshufb %xmm3, %xmm2
+; SSSE3-NEXT:    psubusb %xmm2, %xmm0
+; SSSE3-NEXT:    psubusb %xmm2, %xmm1
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: test12:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    movd %edi, %xmm2
+; SSE41-NEXT:    pxor %xmm3, %xmm3
+; SSE41-NEXT:    pshufb %xmm3, %xmm2
+; SSE41-NEXT:    psubusb %xmm2, %xmm0
+; SSE41-NEXT:    psubusb %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
 ; AVX1-LABEL: test12:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rdi), %ymm0
-; AVX1-NEXT:    vmovd %esi, %xmm1
+; AVX1-NEXT:    vmovd %edi, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -489,617 +372,675 @@ define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test12:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovd %esi, %xmm0
-; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
-; AVX2-NEXT:    vpsubusb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vmovd %edi, %xmm1
+; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <32 x i8> undef, i8 %w, i32 0
   %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
-  %1 = getelementptr inbounds i8, i8* %head, i64 0
-  %2 = bitcast i8* %1 to <32 x i8>*
-  %3 = load <32 x i8>, <32 x i8>* %2, align 1
-  %4 = icmp ult <32 x i8> %3, %broadcast15
-  %5 = sub <32 x i8> %3, %broadcast15
-  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
-  store <32 x i8> %6, <32 x i8>* %2, align 1
-  ret void
+  %1 = icmp ult <32 x i8> %x, %broadcast15
+  %2 = sub <32 x i8> %x, %broadcast15
+  %res = select <32 x i1> %1, <32 x i8> zeroinitializer, <32 x i8> %2
+  ret <32 x i8> %res
 }
 
-define void @test13(i16* nocapture %head, i32* nocapture %w) nounwind {
+define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind {
 ; SSE2-LABEL: test13:
 ; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movdqu (%rsi), %xmm2
-; SSE2-NEXT:    movdqu 16(%rsi), %xmm3
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    psubd %xmm3, %xmm0
-; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    psubd %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm4, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm5, %xmm3
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pxor %xmm1, %xmm4
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm5[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
-; SSE2-NEXT:    psubd %xmm2, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[0,2,2,3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
+; SSE2-NEXT:    psubd %xmm1, %xmm3
 ; SSE2-NEXT:    pslld $16, %xmm0
 ; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    pslld $16, %xmm1
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    packssdw %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm1, %xmm4
-; SSE2-NEXT:    movdqu %xmm4, (%rdi)
+; SSE2-NEXT:    pslld $16, %xmm3
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    packssdw %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test13:
 ; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movdqu (%rdi), %xmm0
-; SSSE3-NEXT:    movdqu (%rsi), %xmm2
-; SSSE3-NEXT:    movdqu 16(%rsi), %xmm3
 ; SSSE3-NEXT:    pxor %xmm4, %xmm4
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm5
-; SSSE3-NEXT:    psubd %xmm3, %xmm0
-; SSSE3-NEXT:    pxor %xmm4, %xmm3
-; SSSE3-NEXT:    pxor %xmm4, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm3
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT:    pshufb %xmm5, %xmm3
+; SSSE3-NEXT:    psubd %xmm2, %xmm0
 ; SSSE3-NEXT:    movdqa %xmm2, %xmm6
 ; SSSE3-NEXT:    pxor %xmm4, %xmm6
-; SSSE3-NEXT:    pxor %xmm1, %xmm4
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT:    pxor %xmm4, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; SSSE3-NEXT:    pshufb %xmm5, %xmm6
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0]
-; SSSE3-NEXT:    psubd %xmm2, %xmm1
+; SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSSE3-NEXT:    pxor %xmm4, %xmm2
+; SSSE3-NEXT:    pxor %xmm3, %xmm4
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm2
+; SSSE3-NEXT:    pshufb %xmm5, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0]
+; SSSE3-NEXT:    psubd %xmm1, %xmm3
 ; SSSE3-NEXT:    pshufb %xmm5, %xmm0
-; SSSE3-NEXT:    pshufb %xmm5, %xmm1
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT:    pandn %xmm1, %xmm6
-; SSSE3-NEXT:    movdqu %xmm6, (%rdi)
+; SSSE3-NEXT:    pshufb %xmm5, %xmm3
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSSE3-NEXT:    pandn %xmm3, %xmm2
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: test13:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    psubd %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm5, %xmm0
+; SSE41-NEXT:    pxor %xmm5, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm2, %xmm6
+; SSE41-NEXT:    pxor %xmm5, %xmm6
+; SSE41-NEXT:    pxor %xmm4, %xmm5
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE41-NEXT:    pshufb %xmm1, %xmm6
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
+; SSE41-NEXT:    psubd %xmm2, %xmm4
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; SSE41-NEXT:    pandn %xmm3, %xmm0
+; SSE41-NEXT:    retq
+;
 ; AVX1-LABEL: test13:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm1
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vpandn %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test13:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
+; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpandn %xmm0, %xmm2, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+vector.ph:
+  %lhs = zext <8 x i16> %x to <8 x i32>
+  %cond = icmp ult <8 x i32> %lhs, %y
+  %sub = sub <8 x i32> %lhs, %y
+  %trunc = trunc <8 x i32> %sub to <8 x i16>
+  %res = select <8 x i1> %cond, <8 x i16> zeroinitializer, <8 x i16> %trunc
+  ret <8 x i16> %res
+}
+
+define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
+; SSE2-LABEL: test14:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
+; SSE2-NEXT:    movdqa %xmm6, %xmm8
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm4, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    psubd %xmm5, %xmm4
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm5
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [255,255,255,255]
+; SSE2-NEXT:    pand %xmm9, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    psubd %xmm10, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm10
+; SSE2-NEXT:    packuswb %xmm5, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    psubd %xmm6, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm8, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm9, %xmm0
+; SSE2-NEXT:    packuswb %xmm6, %xmm0
+; SSE2-NEXT:    packuswb %xmm10, %xmm0
+; SSE2-NEXT:    psubd %xmm8, %xmm1
+; SSE2-NEXT:    pand %xmm9, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm3
+; SSE2-NEXT:    packuswb %xmm4, %xmm3
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm9, %xmm1
+; SSE2-NEXT:    packuswb %xmm2, %xmm1
+; SSE2-NEXT:    packuswb %xmm3, %xmm1
+; SSE2-NEXT:    pandn %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test14:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    pxor %xmm7, %xmm7
+; SSSE3-NEXT:    movdqa %xmm0, %xmm11
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1],xmm11[2],xmm7[2],xmm11[3],xmm7[3],xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7]
+; SSSE3-NEXT:    movdqa %xmm11, %xmm8
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7]
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm7[8],xmm0[9],xmm7[9],xmm0[10],xmm7[10],xmm0[11],xmm7[11],xmm0[12],xmm7[12],xmm0[13],xmm7[13],xmm0[14],xmm7[14],xmm0[15],xmm7[15]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm10
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT:    movdqa %xmm4, %xmm9
+; SSSE3-NEXT:    pxor %xmm7, %xmm9
+; SSSE3-NEXT:    psubd %xmm0, %xmm4
+; SSSE3-NEXT:    movdqa %xmm0, %xmm6
+; SSSE3-NEXT:    pxor %xmm7, %xmm6
+; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT:    pshufb %xmm9, %xmm6
+; SSSE3-NEXT:    movdqa %xmm3, %xmm5
+; SSSE3-NEXT:    pxor %xmm7, %xmm5
+; SSSE3-NEXT:    psubd %xmm10, %xmm3
+; SSSE3-NEXT:    movdqa %xmm10, %xmm0
+; SSSE3-NEXT:    pxor %xmm7, %xmm0
+; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm0
+; SSSE3-NEXT:    pshufb %xmm9, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm5
+; SSSE3-NEXT:    pxor %xmm7, %xmm5
+; SSSE3-NEXT:    psubd %xmm11, %xmm2
+; SSSE3-NEXT:    pxor %xmm7, %xmm11
+; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm11
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT:    pshufb %xmm5, %xmm11
+; SSSE3-NEXT:    movdqa %xmm1, %xmm6
+; SSSE3-NEXT:    pxor %xmm7, %xmm6
+; SSSE3-NEXT:    pxor %xmm8, %xmm7
+; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSSE3-NEXT:    pshufb %xmm5, %xmm7
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
+; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm7[0],xmm0[1]
+; SSSE3-NEXT:    psubd %xmm8, %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSSE3-NEXT:    pand %xmm5, %xmm4
+; SSSE3-NEXT:    pand %xmm5, %xmm3
+; SSSE3-NEXT:    packuswb %xmm4, %xmm3
+; SSSE3-NEXT:    pand %xmm5, %xmm2
+; SSSE3-NEXT:    pand %xmm5, %xmm1
+; SSSE3-NEXT:    packuswb %xmm2, %xmm1
+; SSSE3-NEXT:    packuswb %xmm3, %xmm1
+; SSSE3-NEXT:    andnpd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: test14:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,2,3]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm8 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[2,3,0,1]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm9 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[3,1,2,3]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm6 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm4, %xmm7
+; SSE41-NEXT:    pxor %xmm5, %xmm7
+; SSE41-NEXT:    psubd %xmm6, %xmm4
+; SSE41-NEXT:    pxor %xmm5, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm7, %xmm6
+; SSE41-NEXT:    movdqa {{.*#+}} xmm10 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm10, %xmm6
+; SSE41-NEXT:    movdqa %xmm3, %xmm7
+; SSE41-NEXT:    pxor %xmm5, %xmm7
+; SSE41-NEXT:    psubd %xmm9, %xmm3
+; SSE41-NEXT:    pxor %xmm5, %xmm9
+; SSE41-NEXT:    pcmpgtd %xmm7, %xmm9
+; SSE41-NEXT:    pshufb %xmm10, %xmm9
+; SSE41-NEXT:    punpckldq {{.*#+}} xmm9 = xmm9[0],xmm6[0],xmm9[1],xmm6[1]
+; SSE41-NEXT:    movdqa %xmm1, %xmm6
+; SSE41-NEXT:    pxor %xmm5, %xmm6
+; SSE41-NEXT:    psubd %xmm0, %xmm1
+; SSE41-NEXT:    pxor %xmm5, %xmm0
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm6 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm6, %xmm0
+; SSE41-NEXT:    movdqa %xmm2, %xmm7
+; SSE41-NEXT:    pxor %xmm5, %xmm7
+; SSE41-NEXT:    pxor %xmm8, %xmm5
+; SSE41-NEXT:    pcmpgtd %xmm7, %xmm5
+; SSE41-NEXT:    pshufb %xmm6, %xmm5
+; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm9[4,5,6,7]
+; SSE41-NEXT:    psubd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT:    pand %xmm5, %xmm4
+; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    packuswb %xmm4, %xmm3
+; SSE41-NEXT:    pand %xmm5, %xmm1
+; SSE41-NEXT:    pand %xmm5, %xmm2
+; SSE41-NEXT:    packuswb %xmm2, %xmm1
+; SSE41-NEXT:    packuswb %xmm3, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendvb %xmm0, %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test14:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm8 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm9 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm10, %xmm5
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm11
+; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm9, %xmm5
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpxor %xmm6, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm8, %xmm6
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpacksswb %xmm11, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubd %xmm8, %xmm5, %xmm4
+; AVX1-NEXT:    vpsubd %xmm9, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm10, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm0, %xmm7, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test14:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT:    vpcmpgtd %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
+; AVX2-NEXT:    vpacksswb %xmm6, %xmm5, %xmm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm6
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm4
+; AVX2-NEXT:    vpcmpgtd %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vextracti128 $1, %ymm4, %xmm6
+; AVX2-NEXT:    vpacksswb %xmm6, %xmm4, %xmm4
+; AVX2-NEXT:    vpacksswb %xmm4, %xmm5, %xmm4
+; AVX2-NEXT:    vpsubd %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; AVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+vector.ph:
+  %rhs = zext <16 x i8> %x to <16 x i32>
+  %cond = icmp ult <16 x i32> %y, %rhs
+  %sub = sub <16 x i32> %y, %rhs
+  %truncsub = trunc <16 x i32> %sub to <16 x i8>
+  %res = select <16 x i1> %cond, <16 x i8> zeroinitializer, <16 x i8> %truncsub
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test15(<8 x i16> %x, <8 x i32> %y) nounwind {
+; SSE2-LABEL: test15:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psubd %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm5[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0]
+; SSE2-NEXT:    psubd %xmm1, %xmm3
+; SSE2-NEXT:    pslld $16, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    pslld $16, %xmm3
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    packssdw %xmm0, %xmm3
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test15:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    pxor %xmm4, %xmm4
+; SSSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm5
+; SSSE3-NEXT:    psubd %xmm2, %xmm0
+; SSSE3-NEXT:    pxor %xmm4, %xmm2
+; SSSE3-NEXT:    pxor %xmm4, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:    pshufb %xmm2, %xmm5
+; SSSE3-NEXT:    movdqa %xmm1, %xmm6
+; SSSE3-NEXT:    pxor %xmm4, %xmm6
+; SSSE3-NEXT:    pxor %xmm3, %xmm4
+; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; SSSE3-NEXT:    psubd %xmm1, %xmm3
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    pshufb %xmm2, %xmm3
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSSE3-NEXT:    pand %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa %xmm3, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: test15:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psubd %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm4, %xmm1
+; SSE41-NEXT:    pxor %xmm4, %xmm5
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm6
+; SSE41-NEXT:    pxor %xmm4, %xmm6
+; SSE41-NEXT:    pxor %xmm3, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE41-NEXT:    psubd %xmm2, %xmm3
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test15:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test15:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
 ; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
 ; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm2
 ; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 ; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT:    vpandn %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>, <8 x i16>* %1, align 2
-  %3 = getelementptr inbounds i32, i32* %w, i64 0
-  %4 = bitcast i32* %3 to <8 x i32>*
-  %5 = load <8 x i32>, <8 x i32>* %4, align 2
-  %6 = zext <8 x i16> %2 to <8 x i32>
-  %7 = icmp ult <8 x i32> %6, %5
-  %8 = sub <8 x i32> %6, %5
-  %9 = trunc <8 x i32> %8 to <8 x i16>
-  %10 = select <8 x i1> %7, <8 x i16> zeroinitializer, <8 x i16> %9
-  store <8 x i16> %10, <8 x i16>* %1, align 1
-  ret void
-}
-
-define void @test14(i8* nocapture %head, i32* nocapture %w) nounwind {
-; SSE2-LABEL: test14:
-; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movdqu (%rsi), %xmm8
-; SSE2-NEXT:    movdqu 16(%rsi), %xmm9
-; SSE2-NEXT:    movdqu 32(%rsi), %xmm10
-; SSE2-NEXT:    movdqu 48(%rsi), %xmm7
-; SSE2-NEXT:    pxor %xmm3, %xmm3
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm6
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    psubd %xmm7, %xmm0
-; SSE2-NEXT:    pxor %xmm3, %xmm7
-; SSE2-NEXT:    pxor %xmm3, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255]
-; SSE2-NEXT:    pand %xmm5, %xmm7
-; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    psubd %xmm10, %xmm6
-; SSE2-NEXT:    pxor %xmm3, %xmm10
-; SSE2-NEXT:    pxor %xmm3, %xmm4
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm10
-; SSE2-NEXT:    pand %xmm5, %xmm10
-; SSE2-NEXT:    packuswb %xmm7, %xmm10
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    psubd %xmm9, %xmm1
-; SSE2-NEXT:    pxor %xmm3, %xmm9
-; SSE2-NEXT:    pxor %xmm3, %xmm4
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm9
-; SSE2-NEXT:    pand %xmm5, %xmm9
-; SSE2-NEXT:    movdqa %xmm8, %xmm4
-; SSE2-NEXT:    pxor %xmm3, %xmm4
-; SSE2-NEXT:    pxor %xmm2, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
-; SSE2-NEXT:    pand %xmm5, %xmm4
-; SSE2-NEXT:    packuswb %xmm9, %xmm4
-; SSE2-NEXT:    packuswb %xmm10, %xmm4
-; SSE2-NEXT:    psubd %xmm8, %xmm2
-; SSE2-NEXT:    pand %xmm5, %xmm0
-; SSE2-NEXT:    pand %xmm5, %xmm6
-; SSE2-NEXT:    packuswb %xmm0, %xmm6
-; SSE2-NEXT:    pand %xmm5, %xmm1
-; SSE2-NEXT:    pand %xmm5, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    packuswb %xmm6, %xmm2
-; SSE2-NEXT:    pandn %xmm2, %xmm4
-; SSE2-NEXT:    movdqu %xmm4, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: test14:
-; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movdqu (%rdi), %xmm0
-; SSSE3-NEXT:    movdqu (%rsi), %xmm8
-; SSSE3-NEXT:    movdqu 16(%rsi), %xmm9
-; SSSE3-NEXT:    movdqu 32(%rsi), %xmm10
-; SSSE3-NEXT:    movdqu 48(%rsi), %xmm7
-; SSSE3-NEXT:    pxor %xmm3, %xmm3
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm6
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm5
-; SSSE3-NEXT:    psubd %xmm7, %xmm0
-; SSSE3-NEXT:    pxor %xmm3, %xmm7
-; SSSE3-NEXT:    pxor %xmm3, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm7
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT:    pshufb %xmm5, %xmm7
-; SSSE3-NEXT:    movdqa %xmm6, %xmm4
-; SSSE3-NEXT:    psubd %xmm10, %xmm6
-; SSSE3-NEXT:    pxor %xmm3, %xmm10
-; SSSE3-NEXT:    pxor %xmm3, %xmm4
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm10
-; SSSE3-NEXT:    pshufb %xmm5, %xmm10
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    psubd %xmm9, %xmm1
-; SSSE3-NEXT:    pxor %xmm3, %xmm9
-; SSSE3-NEXT:    pxor %xmm3, %xmm4
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm9
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT:    pshufb %xmm4, %xmm9
-; SSSE3-NEXT:    movdqa %xmm8, %xmm5
-; SSSE3-NEXT:    pxor %xmm3, %xmm5
-; SSSE3-NEXT:    pxor %xmm2, %xmm3
-; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm5
-; SSSE3-NEXT:    pshufb %xmm4, %xmm5
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1]
-; SSSE3-NEXT:    movsd {{.*#+}} xmm10 = xmm5[0],xmm10[1]
-; SSSE3-NEXT:    psubd %xmm8, %xmm2
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSSE3-NEXT:    pand %xmm3, %xmm0
-; SSSE3-NEXT:    pand %xmm3, %xmm6
-; SSSE3-NEXT:    packuswb %xmm0, %xmm6
-; SSSE3-NEXT:    pand %xmm3, %xmm1
-; SSSE3-NEXT:    pand %xmm3, %xmm2
-; SSSE3-NEXT:    packuswb %xmm1, %xmm2
-; SSSE3-NEXT:    packuswb %xmm6, %xmm2
-; SSSE3-NEXT:    andnpd %xmm2, %xmm10
-; SSSE3-NEXT:    movupd %xmm10, (%rdi)
-; SSSE3-NEXT:    retq
-;
-; AVX1-LABEL: test14:
-; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX1-NEXT:    vmovdqu 32(%rsi), %ymm1
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm8 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm9 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm6, %xmm5, %xmm7
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm10, %xmm7
-; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm4, %xmm4
-; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm11
-; AVX1-NEXT:    vpxor %xmm6, %xmm9, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm8, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm11, %xmm3, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm8, %xmm0
-; AVX1-NEXT:    vpsubd %xmm7, %xmm9, %xmm4
-; AVX1-NEXT:    vpsubd %xmm1, %xmm10, %xmm1
-; AVX1-NEXT:    vpsubd %xmm2, %xmm5, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm2
-; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: test14:
-; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX2-NEXT:    vmovdqu 32(%rsi), %ymm1
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
-; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm4
-; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm5
-; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm6
-; AVX2-NEXT:    vpcmpgtd %ymm5, %ymm6, %ymm5
-; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
-; AVX2-NEXT:    vpacksswb %xmm6, %xmm5, %xmm5
-; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm6
-; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm4
-; AVX2-NEXT:    vpcmpgtd %ymm6, %ymm4, %ymm4
-; AVX2-NEXT:    vextracti128 $1, %ymm4, %xmm6
-; AVX2-NEXT:    vpacksswb %xmm6, %xmm4, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm5, %xmm4, %xmm4
-; AVX2-NEXT:    vpsubd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT:    vpsubd %ymm1, %ymm3, %ymm1
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
-; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-vector.ph:
-  %0 = getelementptr inbounds i8, i8* %head, i64 0
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %2 = load <16 x i8>, <16 x i8>* %1, align 2
-  %3 = getelementptr inbounds i32, i32* %w, i64 0
-  %4 = bitcast i32* %3 to <16 x i32>*
-  %5 = load <16 x i32>, <16 x i32>* %4, align 2
-  %6 = zext <16 x i8> %2 to <16 x i32>
-  %7 = icmp ult <16 x i32> %6, %5
-  %8 = sub <16 x i32> %6, %5
-  %9 = trunc <16 x i32> %8 to <16 x i8>
-  %10 = select <16 x i1> %7, <16 x i8> zeroinitializer, <16 x i8> %9
-  store <16 x i8> %10, <16 x i8>* %1, align 1
-  ret void
-}
-
-define void @test15(i16* nocapture %head, i32* nocapture %w) nounwind {
-; SSE2-LABEL: test15:
-; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movdqu (%rsi), %xmm2
-; SSE2-NEXT:    movdqu 16(%rsi), %xmm3
-; SSE2-NEXT:    pxor %xmm4, %xmm4
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    psubd %xmm3, %xmm0
-; SSE2-NEXT:    pxor %xmm4, %xmm3
-; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm5[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pxor %xmm1, %xmm4
-; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
-; SSE2-NEXT:    psubd %xmm2, %xmm1
-; SSE2-NEXT:    pslld $16, %xmm0
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    pslld $16, %xmm1
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    packssdw %xmm0, %xmm1
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    movdqu %xmm1, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: test15:
-; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movdqu (%rdi), %xmm0
-; SSSE3-NEXT:    movdqu (%rsi), %xmm2
-; SSSE3-NEXT:    movdqu 16(%rsi), %xmm4
-; SSSE3-NEXT:    pxor %xmm3, %xmm3
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm5
-; SSSE3-NEXT:    psubd %xmm4, %xmm0
-; SSSE3-NEXT:    pxor %xmm3, %xmm4
-; SSSE3-NEXT:    pxor %xmm3, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT:    pshufb %xmm4, %xmm5
-; SSSE3-NEXT:    movdqa %xmm2, %xmm6
-; SSSE3-NEXT:    pxor %xmm3, %xmm6
-; SSSE3-NEXT:    pxor %xmm1, %xmm3
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm3
-; SSSE3-NEXT:    pshufb %xmm4, %xmm3
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
-; SSSE3-NEXT:    psubd %xmm2, %xmm1
-; SSSE3-NEXT:    pshufb %xmm4, %xmm0
-; SSSE3-NEXT:    pshufb %xmm4, %xmm1
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT:    pand %xmm3, %xmm1
-; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
-; SSSE3-NEXT:    retq
-;
-; AVX1-LABEL: test15:
-; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
-; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm1
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: test15:
-; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
-; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
-; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
-; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpand %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>, <8 x i16>* %1, align 2
-  %3 = getelementptr inbounds i32, i32* %w, i64 0
-  %4 = bitcast i32* %3 to <8 x i32>*
-  %5 = load <8 x i32>, <8 x i32>* %4, align 2
-  %6 = zext <8 x i16> %2 to <8 x i32>
-  %7 = icmp ugt <8 x i32> %6, %5
-  %8 = sub <8 x i32> %6, %5
-  %9 = trunc <8 x i32> %8 to <8 x i16>
-  %10 = select <8 x i1> %7, <8 x i16> %9, <8 x i16> zeroinitializer
-  store <8 x i16> %10, <8 x i16>* %1, align 1
-  ret void
+  %lhs = zext <8 x i16> %x to <8 x i32>
+  %cond = icmp ugt <8 x i32> %lhs, %y
+  %sub = sub <8 x i32> %lhs, %y
+  %truncsub = trunc <8 x i32> %sub to <8 x i16>
+  %res = select <8 x i1> %cond, <8 x i16> %truncsub, <8 x i16> zeroinitializer
+  ret <8 x i16> %res
 }
 
-define void @test16(i16* nocapture %head, i32* nocapture %w) nounwind {
+define <8 x i16> @test16(<8 x i16> %x, <8 x i32> %y) nounwind {
 ; SSE2-LABEL: test16:
 ; SSE2:       ## BB#0: ## %vector.ph
-; SSE2-NEXT:    movdqu (%rdi), %xmm0
-; SSE2-NEXT:    movdqu (%rsi), %xmm2
-; SSE2-NEXT:    movdqu 16(%rsi), %xmm3
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    psubd %xmm3, %xmm0
-; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    psubd %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm4, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm5[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm5[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
 ; SSE2-NEXT:    pxor %xmm4, %xmm5
-; SSE2-NEXT:    pxor %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm3, %xmm4
 ; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
-; SSE2-NEXT:    psubd %xmm2, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0]
+; SSE2-NEXT:    psubd %xmm1, %xmm3
 ; SSE2-NEXT:    pslld $16, %xmm0
 ; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    pslld $16, %xmm1
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    packssdw %xmm0, %xmm1
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    pslld $16, %xmm3
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    packssdw %xmm0, %xmm3
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test16:
 ; SSSE3:       ## BB#0: ## %vector.ph
-; SSSE3-NEXT:    movdqu (%rdi), %xmm0
-; SSSE3-NEXT:    movdqu (%rsi), %xmm2
-; SSSE3-NEXT:    movdqu 16(%rsi), %xmm4
-; SSSE3-NEXT:    pxor %xmm3, %xmm3
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT:    pxor %xmm4, %xmm4
+; SSSE3-NEXT:    movdqa %xmm0, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm5
-; SSSE3-NEXT:    psubd %xmm4, %xmm0
+; SSSE3-NEXT:    psubd %xmm2, %xmm0
+; SSSE3-NEXT:    pxor %xmm4, %xmm2
+; SSSE3-NEXT:    pxor %xmm4, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:    pshufb %xmm2, %xmm5
+; SSSE3-NEXT:    movdqa %xmm1, %xmm6
+; SSSE3-NEXT:    pxor %xmm4, %xmm6
 ; SSSE3-NEXT:    pxor %xmm3, %xmm4
-; SSSE3-NEXT:    pxor %xmm3, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT:    pshufb %xmm4, %xmm5
-; SSSE3-NEXT:    movdqa %xmm2, %xmm6
-; SSSE3-NEXT:    pxor %xmm3, %xmm6
-; SSSE3-NEXT:    pxor %xmm1, %xmm3
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm3
-; SSSE3-NEXT:    pshufb %xmm4, %xmm3
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
-; SSSE3-NEXT:    psubd %xmm2, %xmm1
-; SSSE3-NEXT:    pshufb %xmm4, %xmm0
-; SSSE3-NEXT:    pshufb %xmm4, %xmm1
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT:    pand %xmm3, %xmm1
-; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; SSSE3-NEXT:    psubd %xmm1, %xmm3
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    pshufb %xmm2, %xmm3
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSSE3-NEXT:    pand %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa %xmm3, %xmm0
 ; SSSE3-NEXT:    retq
 ;
+; SSE41-LABEL: test16:
+; SSE41:       ## BB#0: ## %vector.ph
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psubd %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm4, %xmm1
+; SSE41-NEXT:    pxor %xmm4, %xmm5
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm6
+; SSE41-NEXT:    pxor %xmm4, %xmm6
+; SSE41-NEXT:    pxor %xmm3, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE41-NEXT:    psubd %xmm2, %xmm3
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    retq
+;
 ; AVX1-LABEL: test16:
 ; AVX1:       ## BB#0: ## %vector.ph
-; AVX1-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm6
-; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm6, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm1
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsubd %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test16:
 ; AVX2:       ## BB#0: ## %vector.ph
-; AVX2-NEXT:    vmovdqu (%rsi), %ymm0
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
-; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
-; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpcmpgtd %ymm3, %ymm2, %ymm2
 ; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
 ; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpand %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 vector.ph:
-  %0 = getelementptr inbounds i16, i16* %head, i64 0
-  %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>, <8 x i16>* %1, align 2
-  %3 = getelementptr inbounds i32, i32* %w, i64 0
-  %4 = bitcast i32* %3 to <8 x i32>*
-  %5 = load <8 x i32>, <8 x i32>* %4, align 2
-  %6 = zext <8 x i16> %2 to <8 x i32>
-  %7 = icmp ult <8 x i32> %5, %6
-  %8 = sub <8 x i32> %6, %5
-  %9 = trunc <8 x i32> %8 to <8 x i16>
-  %10 = select <8 x i1> %7, <8 x i16> %9, <8 x i16> zeroinitializer
-  store <8 x i16> %10, <8 x i16>* %1, align 1
-  ret void
+  %lhs = zext <8 x i16> %x to <8 x i32>
+  %cond = icmp ult <8 x i32> %y, %lhs
+  %sub = sub <8 x i32> %lhs, %y
+  %truncsub = trunc <8 x i32> %sub to <8 x i16>
+  %res = select <8 x i1> %cond, <8 x i16> %truncsub, <8 x i16> zeroinitializer
+  ret <8 x i16> %res
 }
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 16f152d169d3..5e9e1e364fef 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -134,10 +134,7 @@ entry:
 @g_16 = internal global i32 -1
 
 ; X64-LABEL: test8:
-; X64-NEXT: movl _g_16(%rip), %eax
-; X64-NEXT: movl $0, _g_16(%rip)
-; X64-NEXT: orl  $1, %eax
-; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: orb  $1, _g_16(%rip)
 ; X64-NEXT: ret
 define void @test8() nounwind {
   %tmp = load i32, i32* @g_16
diff --git a/test/CodeGen/X86/swift-return.ll b/test/CodeGen/X86/swift-return.ll
index 60e33e62b4ad..0ea176d5d82f 100644
--- a/test/CodeGen/X86/swift-return.ll
+++ b/test/CodeGen/X86/swift-return.ll
@@ -184,11 +184,11 @@ define void @consume_i1_ret() {
   %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
   %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
   %val = zext i1 %v3 to i32
-  store i32 %val, i32* @var
+  store volatile i32 %val, i32* @var
   %val2 = zext i1 %v5 to i32
-  store i32 %val2, i32* @var
+  store volatile i32 %val2, i32* @var
   %val3 = zext i1 %v6 to i32
-  store i32 %val3, i32* @var
+  store volatile i32 %val3, i32* @var
   %val4 = zext i1 %v7 to i32
   store i32 %val4, i32* @var
   ret void
diff --git a/test/CodeGen/X86/win32-spill-xmm.ll b/test/CodeGen/X86/win32-spill-xmm.ll
index 0db97cfe20f0..c6b163b88b24 100644
--- a/test/CodeGen/X86/win32-spill-xmm.ll
+++ b/test/CodeGen/X86/win32-spill-xmm.ll
@@ -20,7 +20,7 @@ declare void @bar(<16 x float> %a, i32 %b)
 ; Check that proper alignment of spilled vector does not affect vargs
 
 ; CHECK-LABEL: vargs_not_affected
-; CHECK: leal    28(%ebp), %eax
+; CHECK: movl 28(%ebp), %eax
 define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
 entry:
   %ap = alloca i8*, align 4
diff --git a/test/CodeGen/X86/win64_sibcall.ll b/test/CodeGen/X86/win64_sibcall.ll
index 4bba0e1e0acd..42dd4d31ca9f 100644
--- a/test/CodeGen/X86/win64_sibcall.ll
+++ b/test/CodeGen/X86/win64_sibcall.ll
@@ -12,8 +12,8 @@ entry:
 ; LINUX:	movq	$0, -8(%rsp)
 
   %this = alloca %Object addrspace(1)*
-  store %Object addrspace(1)* null, %Object addrspace(1)** %this
-  store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
+  store volatile %Object addrspace(1)* null, %Object addrspace(1)** %this
+  store volatile %Object addrspace(1)* %param0, %Object addrspace(1)** %this
   br label %0
 
 ; <label>:0                                       ; preds = %entry
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index 8d7f2010a541..20386bf36395 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -94,9 +94,7 @@ entry:
 
 ; CHECK-LABEL: arg4:
 ; CHECK: pushq
-; va_start:
-; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
-; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_start (optimized away as overwritten by va_arg)
 ; va_arg:
 ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
 ; CHECK: movq [[REG_arg4_2]], (%rsp)
diff --git a/test/CodeGen/X86/x86-64-ms_abi-vararg.ll b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
index e3436521a5bd..299190e8a595 100644
--- a/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
+++ b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
@@ -90,9 +90,7 @@ entry:
 }
 
 ; CHECK-LABEL: arg4:
-; va_start:
-; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
-; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_start (optimized away as overwritten by va_arg)
 ; va_arg:
 ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
 ; CHECK: movq [[REG_arg4_2]], (%rsp)
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86-64_debug_frame.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86-64_debug_frame.s
new file mode 100644
index 000000000000..8f907a6c4991
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86-64_debug_frame.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj -o %T/ELF_x86-64_debug_frame.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -check=%s %T/ELF_x86-64_debug_frame.o
+
+        .text
+        .file   "debug_frame_test.c"
+        .align  16, 0x90
+        .type   foo,@function
+foo:
+        .cfi_startproc
+        retq
+.Ltmp0:
+        .size   foo, .Ltmp0-foo
+        .cfi_endproc
+        .cfi_sections .debug_frame
+
+# Check that .debug_frame is mapped to 0.
+# rtdyld-check: section_addr(ELF_x86-64_debug_frame.o, .debug_frame) = 0
+
+# Check that The relocated FDE's CIE offset also points to zero.
+# rtdyld-check: *{4}(section_addr(ELF_x86-64_debug_frame.o, .debug_frame) + 0x1C) = 0
diff --git a/test/Feature/optnone-llc.ll b/test/Feature/optnone-llc.ll
index 69dc5291226a..2129fc9b8815 100644
--- a/test/Feature/optnone-llc.ll
+++ b/test/Feature/optnone-llc.ll
@@ -42,6 +42,7 @@ attributes #0 = { optnone noinline }
 ; LLC-Ox-DAG: Skipping pass 'Control Flow Optimizer'
 ; LLC-Ox-DAG: Skipping pass 'Machine code sinking'
 ; LLC-Ox-DAG: Skipping pass 'Machine Common Subexpression Elimination'
+; LLC-Ox-DAG: Skipping pass 'Shrink Wrapping analysis'
 ; LLC-Ox-DAG: Skipping pass 'Machine Copy Propagation Pass'
 ; LLC-Ox-DAG: Skipping pass 'Machine Instruction Scheduler'
 ; LLC-Ox-DAG: Skipping pass 'Machine Loop Invariant Code Motion'
diff --git a/test/MC/AMDGPU/vop3-gfx9.s b/test/MC/AMDGPU/vop3-gfx9.s
index 22a0cddceab4..f50d9248e738 100644
--- a/test/MC/AMDGPU/vop3-gfx9.s
+++ b/test/MC/AMDGPU/vop3-gfx9.s
@@ -35,6 +35,30 @@ v_xad_u32 v1, v2, v3, v4
 // GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04]
 // NOVI: :1: error: instruction not supported on this GPU
 
+v_min3_f16 v1, v2, v3, v4
+// GFX9: v_min3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf4,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_min3_i16 v1, v2, v3, v4
+// GFX9: v_min3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf5,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_min3_u16 v1, v2, v3, v4
+// GFX9: v_min3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf6,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_f16 v1, v2, v3, v4
+// GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_i16 v1, v2, v3, v4
+// GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
+v_max3_u16 v1, v2, v3, v4
+// GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04]
+// NOVI: :1: error: instruction not supported on this GPU
+
 v_med3_f16 v1, v2, v3, v4
 // GFX9: v_med3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfa,0xd1,0x02,0x07,0x12,0x04]
 // NOVI: :1: error: instruction not supported on this GPU
diff --git a/test/TableGen/GlobalISelEmitter.td b/test/TableGen/GlobalISelEmitter.td
index 9f89602ae4ad..2784e937954a 100644
--- a/test/TableGen/GlobalISelEmitter.td
+++ b/test/TableGen/GlobalISelEmitter.td
@@ -7,6 +7,10 @@ include "llvm/Target/Target.td"
 def MyTargetISA : InstrInfo;
 def MyTarget : Target { let InstructionSet = MyTargetISA; }
 
+let TargetPrefix = "mytarget" in {
+def int_mytarget_nop : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+}
+
 def R0 : Register<"r0"> { let Namespace = "MyTarget"; }
 def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
 def GPR32Op : RegisterOperand<GPR32>;
@@ -127,6 +131,37 @@ def : Pat<(select GPR32:$src1, complex:$src2, complex:$src3),
 def ADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2),
             [(set GPR32:$dst, (add GPR32:$src1, GPR32:$src2))]>;
 
+//===- Test a simple pattern with an intrinsic. ---------------------------===//
+//
+
+// CHECK-LABEL: if ([&]() {
+// CHECK-NEXT:    MachineInstr &MI0 = I;
+// CHECK-NEXT:    if (MI0.getNumOperands() < 3)
+// CHECK-NEXT:      return false;
+// CHECK-NEXT:    if ((MI0.getOpcode() == TargetOpcode::G_INTRINSIC) &&
+// CHECK-NEXT:        ((/* dst */ (MRI.getType(MI0.getOperand(0).getReg()) == (LLT::scalar(32))) &&
+// CHECK-NEXT:         ((&RBI.getRegBankFromRegClass(MyTarget::GPR32RegClass) == RBI.getRegBank(MI0.getOperand(0).getReg(), MRI, TRI))))) &&
+// CHECK-NEXT:        ((/* Operand 1 */ (isOperandImmEqual(MI0.getOperand(1), [[ID:[0-9]+]], MRI)))) &&
+// CHECK-NEXT:        ((/* src1 */ (MRI.getType(MI0.getOperand(2).getReg()) == (LLT::scalar(32))) &&
+// CHECK-NEXT:         ((&RBI.getRegBankFromRegClass(MyTarget::GPR32RegClass) == RBI.getRegBank(MI0.getOperand(2).getReg(), MRI, TRI)))))) {
+// CHECK-NEXT:      // (intrinsic_wo_chain:i32 [[ID]]:iPTR, GPR32:i32:$src1) => (MOV:i32 GPR32:i32:$src1)
+// CHECK-NEXT:      MachineInstrBuilder MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(MyTarget::MOV));
+// CHECK-NEXT:      MIB.add(MI0.getOperand(0)/*dst*/);
+// CHECK-NEXT:      MIB.add(MI0.getOperand(2)/*src1*/);
+// CHECK-NEXT:      for (const auto *FromMI : {&MI0, })
+// CHECK-NEXT:        for (const auto &MMO : FromMI->memoperands())
+// CHECK-NEXT:          MIB.addMemOperand(MMO);
+// CHECK-NEXT:      I.eraseFromParent();
+// CHECK-NEXT:      MachineInstr &NewI = *MIB;
+// CHECK-NEXT:      constrainSelectedInstRegOperands(NewI, TII, TRI, RBI);
+// CHECK-NEXT:      return true;
+// CHECK-NEXT:    }
+// CHECK-NEXT:    return false;
+// CHECK-NEXT:  }()) { return true; }
+
+def MOV : I<(outs GPR32:$dst), (ins GPR32:$src1),
+            [(set GPR32:$dst, (int_mytarget_nop GPR32:$src1))]>;
+
 //===- Test a nested instruction match. -----------------------------------===//
 
 // CHECK-LABEL: if ([&]() {
@@ -138,6 +173,8 @@ def ADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2),
 // CHECK-NEXT:      return false;
 // CHECK-NEXT:    if (!MI0.getOperand(1).isReg())
 // CHECK-NEXT:      return false;
+// CHECK-NEXT:    if (TRI.isPhysicalRegister(MI0.getOperand(1).getReg()))
+// CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI1 = *MRI.getVRegDef(MI0.getOperand(1).getReg());
 // CHECK-NEXT:    if (MI1.getNumOperands() < 3)
 // CHECK-NEXT:      return false;
@@ -180,6 +217,8 @@ def ADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2),
 // CHECK-NEXT:      return false;
 // CHECK-NEXT:    if (!MI0.getOperand(2).isReg())
 // CHECK-NEXT:      return false;
+// CHECK-NEXT:    if (TRI.isPhysicalRegister(MI0.getOperand(2).getReg()))
+// CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI1 = *MRI.getVRegDef(MI0.getOperand(2).getReg());
 // CHECK-NEXT:    if (MI1.getNumOperands() < 3)
 // CHECK-NEXT:      return false;
@@ -387,6 +426,42 @@ def XOR : I<(outs GPR32:$dst), (ins Z:$src2, GPR32:$src1),
 def XORlike : I<(outs GPR32:$dst), (ins m1Z:$src2, GPR32:$src1),
                 [(set GPR32:$dst, (xor GPR32:$src1, -4))]>;
 
+//===- Test a simple pattern with multiple operands with defaults. --------===//
+//
+
+// CHECK-LABEL: if ([&]() {
+// CHECK-NEXT:    MachineInstr &MI0 = I;
+// CHECK-NEXT:    if (MI0.getNumOperands() < 3)
+// CHECK-NEXT:      return false;
+// CHECK-NEXT:    if ((MI0.getOpcode() == TargetOpcode::G_XOR) &&
+// CHECK-NEXT:        ((/* dst */ (MRI.getType(MI0.getOperand(0).getReg()) == (LLT::scalar(32))) &&
+// CHECK-NEXT:         ((&RBI.getRegBankFromRegClass(MyTarget::GPR32RegClass) == RBI.getRegBank(MI0.getOperand(0).getReg(), MRI, TRI))))) &&
+// CHECK-NEXT:        ((/* src1 */ (MRI.getType(MI0.getOperand(1).getReg()) == (LLT::scalar(32))) &&
+// CHECK-NEXT:         ((&RBI.getRegBankFromRegClass(MyTarget::GPR32RegClass) == RBI.getRegBank(MI0.getOperand(1).getReg(), MRI, TRI))))) &&
+// CHECK-NEXT:        ((/* Operand 2 */ (MRI.getType(MI0.getOperand(2).getReg()) == (LLT::scalar(32))) &&
+// CHECK-NEXT:        (isOperandImmEqual(MI0.getOperand(2), -5, MRI))))) {
+// CHECK-NEXT:      // (xor:i32 GPR32:i32:$src1, -5:i32) => (XORManyDefaults:i32 GPR32:i32:$src1)
+// CHECK-NEXT:      MachineInstrBuilder MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(MyTarget::XORManyDefaults));
+// CHECK-NEXT:      MIB.add(MI0.getOperand(0)/*dst*/);
+// CHECK-NEXT:      MIB.addImm(-1);
+// CHECK-NEXT:      MIB.addReg(MyTarget::R0);
+// CHECK-NEXT:      MIB.addReg(MyTarget::R0);
+// CHECK-NEXT:      MIB.add(MI0.getOperand(1)/*src1*/);
+// CHECK-NEXT:      for (const auto *FromMI : {&MI0, })
+// CHECK-NEXT:        for (const auto &MMO : FromMI->memoperands())
+// CHECK-NEXT:          MIB.addMemOperand(MMO);
+// CHECK-NEXT:      I.eraseFromParent();
+// CHECK-NEXT:      MachineInstr &NewI = *MIB;
+// CHECK-NEXT:      constrainSelectedInstRegOperands(NewI, TII, TRI, RBI);
+// CHECK-NEXT:      return true;
+// CHECK-NEXT:    }
+// CHECK-NEXT:    return false;
+// CHECK-NEXT:  }()) { return true; }
+
+// The -5 is just to distinguish it from the other cases.
+def XORManyDefaults : I<(outs GPR32:$dst), (ins m1Z:$src3, Z:$src2, GPR32:$src1),
+                        [(set GPR32:$dst, (xor GPR32:$src1, -5))]>;
+
 //===- Test a simple pattern with constant immediate operands. ------------===//
 //
 // This must precede the 3-register variants because constant immediates have
diff --git a/test/Transforms/Coroutines/coro-catchswitch.ll b/test/Transforms/Coroutines/coro-catchswitch.ll
new file mode 100644
index 000000000000..dd06f1280cae
--- /dev/null
+++ b/test/Transforms/Coroutines/coro-catchswitch.ll
@@ -0,0 +1,88 @@
+; Verifies that we can insert the spill for a PHI preceding the catchswitch
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+; CHECK-LABEL: define void @f(
+define void @f(i1 %cond) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 8, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br i1 %cond, label %if.else, label %if.then
+
+if.then:
+  invoke void @may_throw1()
+          to label %coro.ret unwind label %catch.dispatch
+
+if.else:
+  invoke void @may_throw2()
+          to label %coro.ret unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %if.else, %if.then
+  %val = phi i32 [ 1, %if.then ], [ 2, %if.else ]
+  %switch = catchswitch within none [label %catch] unwind label %cleanuppad
+
+; Verifies that we split out the PHI into a separate block
+; added a cleanuppad spill cleanupret unwinding into the catchswitch.
+
+; CHECK: catch.dispatch:
+; CHECK:  %val = phi i32 [ 2, %if.else ], [ 1, %if.then ]
+; CHECK:  %[[Pad:.+]] = cleanuppad within none []
+; CHECK:  %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK:  store i32 %val, i32* %val.spill.addr
+; CHECK:  cleanupret from %[[Pad]] unwind label %[[Switch:.+]]
+
+; CHECK: [[Switch]]:
+; CHECK: %switch = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %pad = catchpad within %switch [i8* null, i32 64, i8* null]
+  catchret from %pad to label %suspend
+
+suspend:
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %coro.ret
+  ]
+
+resume:                                   ; preds = %await2.suspend
+  call void @print(i32 %val)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+    ret void
+
+cleanuppad:
+  %cpad = cleanuppad within none []
+  cleanupret from %cpad unwind to caller
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.alloc(token) #2
+
+; Function Attrs: nobuiltin
+declare i32 @llvm.coro.size.i32() #4
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+
+declare void @may_throw1()
+declare void @may_throw2()
+declare void @print(i32)
+declare noalias i8* @malloc(i32)
+declare void @free(i8*)
+
+declare i1 @llvm.coro.end(i8*, i1) #2
+
+; Function Attrs: nobuiltin nounwind
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
index da6e52343b2d..dad57440063b 100644
--- a/test/Transforms/Inline/inline-hot-callee.ll
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
-; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
 
-; This tests that a hot callee gets the (higher) inlinehint-threshold even without
-; inline hints and gets inlined because the cost is less than inlinehint-threshold.
-; A cold callee with identical body does not get inlined because cost exceeds the
-; inline-threshold
+; This tests that a hot callee gets the (higher) inlinehint-threshold even
+; without inline hints and gets inlined because the cost is less than
+; inlinehint-threshold. A cold callee with identical body does not get inlined
+; because cost exceeds the inline-threshold. This test is relevant only when the
+; old pass manager is used.
 
 define i32 @callee1(i32 %x) !prof !21 {
   %x1 = add i32 %x, 1
diff --git a/test/Transforms/InstCombine/canonicalize_branch.ll b/test/Transforms/InstCombine/canonicalize_branch.ll
index 29fd51a39ab4..401490879e92 100644
--- a/test/Transforms/InstCombine/canonicalize_branch.ll
+++ b/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -1,69 +1,500 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; Test an already canonical branch to make sure we don't flip those.
-define i32 @test0(i32 %X, i32 %Y) {
-        %C = icmp eq i32 %X, %Y
-        br i1 %C, label %T, label %F, !prof !0
-
-; CHECK-LABEL: @test0(
-; CHECK: %C = icmp eq i32 %X, %Y
-; CHECK: br i1 %C, label %T, label %F
-
+define i32 @eq(i32 %X, i32 %Y) {
+; CHECK-LABEL: @eq(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !0
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp eq i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !0
 T:
-        ret i32 12
+  ret i32 12
 F:
-        ret i32 123
+  ret i32 123
 }
 
-define i32 @test1(i32 %X, i32 %Y) {
-        %C = icmp ne i32 %X, %Y
-        br i1 %C, label %T, label %F, !prof !1
-
-; CHECK-LABEL: @test1(
-; CHECK: %C = icmp eq i32 %X, %Y
-; CHECK: br i1 %C, label %F, label %T
-
+define i32 @ne(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ne(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !1
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ne i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !1
 T:
-        ret i32 12
+  ret i32 12
 F:
-        ret i32 123
+  ret i32 123
 }
 
-define i32 @test2(i32 %X, i32 %Y) {
-        %C = icmp ule i32 %X, %Y
-        br i1 %C, label %T, label %F, !prof !2
-
-; CHECK-LABEL: @test2(
-; CHECK: %C = icmp ugt i32 %X, %Y
-; CHECK: br i1 %C, label %F, label %T
-
+define i32 @ugt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ugt(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !2
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ugt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !2
 T:
-        ret i32 12
+  ret i32 12
 F:
-        ret i32 123
+  ret i32 123
 }
 
-define i32 @test3(i32 %X, i32 %Y) {
-        %C = icmp uge i32 %X, %Y
-        br i1 %C, label %T, label %F, !prof !3
-
-; CHECK-LABEL: @test3(
-; CHECK: %C = icmp ult i32 %X, %Y
-; CHECK: br i1 %C, label %F, label %T
-
+define i32 @uge(i32 %X, i32 %Y) {
+; CHECK-LABEL: @uge(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !3
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp uge i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !3
 T:
-        ret i32 12
+  ret i32 12
 F:
-        ret i32 123
+  ret i32 123
 }
 
-!0 = !{!"branch_weights", i32 1, i32 2}
-!1 = !{!"branch_weights", i32 3, i32 4}
-!2 = !{!"branch_weights", i32 5, i32 6}
-!3 = !{!"branch_weights", i32 7, i32 8}
-; Base case shouldn't change.
-; CHECK: !0 = {{.*}} i32 1, i32 2}
+define i32 @ult(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ult(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !4
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ult i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !4
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @ule(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ule(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !5
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ule i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !5
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sgt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sgt(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !6
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sgt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !6
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sge(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sge(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !7
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sge i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !7
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @slt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @slt(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !8
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp slt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !8
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sle(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sle(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !9
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sle i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !9
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_false(float %X, float %Y) {
+; CHECK-LABEL: @f_false(
+; CHECK-NEXT:    br i1 false, label [[T:%.*]], label [[F:%.*]], !prof !10
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp false float %X, %Y
+  br i1 %C, label %T, label %F, !prof !10
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_oeq(float %X, float %Y) {
+; CHECK-LABEL: @f_oeq(
+; CHECK-NEXT:    [[C:%.*]] = fcmp oeq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !11
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp oeq float %X, %Y
+  br i1 %C, label %T, label %F, !prof !11
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ogt(float %X, float %Y) {
+; CHECK-LABEL: @f_ogt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !12
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ogt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !12
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_oge(float %X, float %Y) {
+; CHECK-LABEL: @f_oge(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ult float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !13
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp oge float %X, %Y
+  br i1 %C, label %T, label %F, !prof !13
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_olt(float %X, float %Y) {
+; CHECK-LABEL: @f_olt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp olt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !14
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp olt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !14
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ole(float %X, float %Y) {
+; CHECK-LABEL: @f_ole(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !15
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ole float %X, %Y
+  br i1 %C, label %T, label %F, !prof !15
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_one(float %X, float %Y) {
+; CHECK-LABEL: @f_one(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ueq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !16
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp one float %X, %Y
+  br i1 %C, label %T, label %F, !prof !16
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ord(float %X, float %Y) {
+; CHECK-LABEL: @f_ord(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ord float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !17
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ord float %X, %Y
+  br i1 %C, label %T, label %F, !prof !17
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_uno(float %X, float %Y) {
+; CHECK-LABEL: @f_uno(
+; CHECK-NEXT:    [[C:%.*]] = fcmp uno float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !18
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp uno float %X, %Y
+  br i1 %C, label %T, label %F, !prof !18
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ueq(float %X, float %Y) {
+; CHECK-LABEL: @f_ueq(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ueq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !19
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ueq float %X, %Y
+  br i1 %C, label %T, label %F, !prof !19
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ugt(float %X, float %Y) {
+; CHECK-LABEL: @f_ugt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !20
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ugt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !20
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_uge(float %X, float %Y) {
+; CHECK-LABEL: @f_uge(
+; CHECK-NEXT:    [[C:%.*]] = fcmp uge float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !21
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp uge float %X, %Y
+  br i1 %C, label %T, label %F, !prof !21
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ult(float %X, float %Y) {
+; CHECK-LABEL: @f_ult(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ult float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !22
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ult float %X, %Y
+  br i1 %C, label %T, label %F, !prof !22
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ule(float %X, float %Y) {
+; CHECK-LABEL: @f_ule(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !23
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ule float %X, %Y
+  br i1 %C, label %T, label %F, !prof !23
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_une(float %X, float %Y) {
+; CHECK-LABEL: @f_une(
+; CHECK-NEXT:    [[C:%.*]] = fcmp une float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !24
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp une float %X, %Y
+  br i1 %C, label %T, label %F, !prof !24
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_true(float %X, float %Y) {
+; CHECK-LABEL: @f_true(
+; CHECK-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]], !prof !25
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp true float %X, %Y
+  br i1 %C, label %T, label %F, !prof !25
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+
+!0  = !{!"branch_weights", i32 0,  i32 99}
+!1  = !{!"branch_weights", i32 1,  i32 99}
+!2  = !{!"branch_weights", i32 2,  i32 99}
+!3  = !{!"branch_weights", i32 3,  i32 99}
+!4  = !{!"branch_weights", i32 4,  i32 99}
+!5  = !{!"branch_weights", i32 5,  i32 99}
+!6  = !{!"branch_weights", i32 6,  i32 99}
+!7  = !{!"branch_weights", i32 7,  i32 99}
+!8  = !{!"branch_weights", i32 8,  i32 99}
+!9  = !{!"branch_weights", i32 9,  i32 99}
+!10 = !{!"branch_weights", i32 10, i32 99}
+!11 = !{!"branch_weights", i32 11, i32 99}
+!12 = !{!"branch_weights", i32 12, i32 99}
+!13 = !{!"branch_weights", i32 13, i32 99}
+!14 = !{!"branch_weights", i32 14, i32 99}
+!15 = !{!"branch_weights", i32 15, i32 99}
+!16 = !{!"branch_weights", i32 16, i32 99}
+!17 = !{!"branch_weights", i32 17, i32 99}
+!18 = !{!"branch_weights", i32 18, i32 99}
+!19 = !{!"branch_weights", i32 19, i32 99}
+!20 = !{!"branch_weights", i32 20, i32 99}
+!21 = !{!"branch_weights", i32 21, i32 99}
+!22 = !{!"branch_weights", i32 22, i32 99}
+!23 = !{!"branch_weights", i32 23, i32 99}
+!24 = !{!"branch_weights", i32 24, i32 99}
+!25 = !{!"branch_weights", i32 25, i32 99}
+
 ; Ensure that the branch metadata is reversed to match the reversals above.
-; CHECK: !1 = {{.*}} i32 4, i32 3}
-; CHECK: !2 = {{.*}} i32 6, i32 5}
-; CHECK: !3 = {{.*}} i32 8, i32 7}
+; CHECK: !0 = {{.*}} i32 0, i32 99}
+; CHECK: !1 = {{.*}} i32 99, i32 1}
+; CHECK: !2 = {{.*}} i32 2, i32 99}
+; CHECK: !3 = {{.*}} i32 99, i32 3}
+; CHECK: !4 = {{.*}} i32 4, i32 99}
+; CHECK: !5 = {{.*}} i32 99, i32 5}
+; CHECK: !6 = {{.*}} i32 6, i32 99}
+; CHECK: !7 = {{.*}} i32 99, i32 7}
+; CHECK: !8 = {{.*}} i32 8, i32 99}
+; CHECK: !9 = {{.*}} i32 99, i32 9}
+; CHECK: !10 = {{.*}} i32 10, i32 99}
+; CHECK: !11 = {{.*}} i32 11, i32 99}
+; CHECK: !12 = {{.*}} i32 12, i32 99}
+; CHECK: !13 = {{.*}} i32 99, i32 13}
+; CHECK: !14 = {{.*}} i32 14, i32 99}
+; CHECK: !15 = {{.*}} i32 99, i32 15}
+; CHECK: !16 = {{.*}} i32 99, i32 16}
+; CHECK: !17 = {{.*}} i32 17, i32 99}
+; CHECK: !18 = {{.*}} i32 18, i32 99}
+; CHECK: !19 = {{.*}} i32 19, i32 99}
+; CHECK: !20 = {{.*}} i32 20, i32 99}
+; CHECK: !21 = {{.*}} i32 21, i32 99}
+; CHECK: !22 = {{.*}} i32 22, i32 99}
+; CHECK: !23 = {{.*}} i32 23, i32 99}
+; CHECK: !24 = {{.*}} i32 24, i32 99}
+; CHECK: !25 = {{.*}} i32 25, i32 99}
+
diff --git a/test/Transforms/InstCombine/debuginfo-skip.ll b/test/Transforms/InstCombine/debuginfo-skip.ll
new file mode 100644
index 000000000000..d2295e29ee46
--- /dev/null
+++ b/test/Transforms/InstCombine/debuginfo-skip.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instcombine -debug -S -o %t 2>&1 | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=CHECK-IR
+; REQUIRES: asserts
+
+; Debug output from InstCombine should not have any @llvm.dbg.* instructions visited
+; CHECK-NOT: call void @llvm.dbg.
+
+; The resulting IR should still have them
+; CHECK-IR: call void @llvm.dbg.
+
+define i32 @foo(i32 %j) #0 !dbg !7 {
+entry:
+  %j.addr = alloca i32, align 4
+  store i32 %j, i32* %j.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %j.addr, metadata !11, metadata !12), !dbg !13
+  call void @llvm.dbg.value(metadata i32 10, i64 0, metadata !16, metadata !12), !dbg !15
+  %0 = load i32, i32* %j.addr, align 4, !dbg !14
+  ret i32 %0, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "a.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"PIC Level", i32 2}
+!6 = !{!"clang version 5.0.0 (trunk 302918) (llvm/trunk 302925)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "j", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 2, column: 13, scope: !7)
+!14 = !DILocation(line: 5, column: 10, scope: !7)
+!15 = !DILocation(line: 5, column: 3, scope: !7)
+!16 = !DILocalVariable(name: "h", scope: !7, file: !1, line: 4, type: !10)
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index 427ea655fcb2..a9b4e4e5cfcc 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -738,8 +738,7 @@ define i32 @test54(i32 %a, i32 %b) {
 define i8 @lshr_perfect_mask(i8 %x) {
 ; CHECK-LABEL: @lshr_perfect_mask(
 ; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %x, 5
-; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[SH]], 7
-; CHECK-NEXT:    ret i8 [[MASK]]
+; CHECK-NEXT:    ret i8 [[SH]]
 ;
   %sh = lshr i8 %x, 5
   %mask = and i8 %sh, 7  ; 0x07
@@ -749,8 +748,7 @@ define i8 @lshr_perfect_mask(i8 %x) {
 define <2 x i8> @lshr_oversized_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_oversized_mask_splat(
 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> %x, <i8 5, i8 5>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], <i8 -121, i8 -121>
-; CHECK-NEXT:    ret <2 x i8> [[MASK]]
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sh = lshr <2 x i8> %x, <i8 5, i8 5>
   %mask = and <2 x i8> %sh, <i8 135, i8 135>  ; 0x87
@@ -771,8 +769,7 @@ define i8 @lshr_undersized_mask(i8 %x) {
 define <2 x i8> @shl_perfect_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_perfect_mask_splat(
 ; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> %x, <i8 6, i8 6>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], <i8 -64, i8 -64>
-; CHECK-NEXT:    ret <2 x i8> [[MASK]]
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sh = shl <2 x i8> %x, <i8 6, i8 6>
   %mask = and <2 x i8> %sh, <i8 192, i8 192>  ; 0xC0
@@ -782,8 +779,7 @@ define <2 x i8> @shl_perfect_mask_splat(<2 x i8> %x) {
 define i8 @shl_oversized_mask(i8 %x) {
 ; CHECK-LABEL: @shl_oversized_mask(
 ; CHECK-NEXT:    [[SH:%.*]] = shl i8 %x, 6
-; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[SH]], -61
-; CHECK-NEXT:    ret i8 [[MASK]]
+; CHECK-NEXT:    ret i8 [[SH]]
 ;
   %sh = shl i8 %x, 6
   %mask = and i8 %sh, 195  ; 0xC3
diff --git a/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/test/Transforms/LoopVectorize/AArch64/pr33053.ll
new file mode 100644
index 000000000000..6763940bf98e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/pr33053.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -mtriple=aarch64 -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+@b = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i16* null, align 8
+
+; Function Attrs: norecurse nounwind readonly
+define i32 @fn1() local_unnamed_addr #0 {
+; Ensure that we don't emit reduction intrinsics for unsupported short reductions.
+; CHECK-NOT: @llvm.experimental.vector.reduce
+entry:
+  %0 = load i32, i32* @b, align 4, !tbaa !1
+  %cmp40 = icmp sgt i32 %0, 0
+  br i1 %cmp40, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = load i16*, i16** @a, align 8, !tbaa !5
+  %2 = load i32, i32* @b, align 4, !tbaa !1
+  %3 = sext i32 %2 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ]
+  %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %1, i64 %indvars.iv
+  %4 = load i16, i16* %arrayidx, align 2, !tbaa !7
+  %cmp2 = icmp sgt i16 %c.042, %4
+  %c.0. = select i1 %cmp2, i16 %c.042, i16 %4
+  %cmp13 = icmp slt i16 %d.043, %4
+  %.sink28 = select i1 %cmp13, i16 %d.043, i16 %4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp slt i64 %indvars.iv.next, %3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %c.0.lcssa = phi i16 [ undef, %entry ], [ %c.0., %for.body ]
+  %d.0.lcssa = phi i16 [ undef, %entry ], [ %.sink28, %for.body ]
+  %cmp26 = icmp sgt i16 %c.0.lcssa, %d.0.lcssa
+  %conv27 = zext i1 %cmp26 to i32
+  ret i32 %conv27
+}
+
+attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+!llvm.ident = !{!0}
+
+!0 = !{!"clang"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !3, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"short", !3, i64 0}
diff --git a/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
index be08a63b212c..9d9aea00e9a9 100644
--- a/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
+++ b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -20,15 +20,7 @@ target triple = "aarch64--linux-gnu"
 ; CHECK:   add <16 x i8>
 ;
 ; CHECK: middle.block:
-; CHECK:   shufflevector <16 x i8>
-; CHECK:   add <16 x i8>
-; CHECK:   shufflevector <16 x i8>
-; CHECK:   add <16 x i8>
-; CHECK:   shufflevector <16 x i8>
-; CHECK:   add <16 x i8>
-; CHECK:   shufflevector <16 x i8>
-; CHECK:   add <16 x i8>
-; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
 ; CHECK:   zext i8 [[Rdx]] to i32
 ;
 define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
@@ -83,13 +75,7 @@ for.body:
 ; CHECK:   add <8 x i16>
 ;
 ; CHECK: middle.block:
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
 define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
@@ -146,13 +132,7 @@ for.body:
 ; CHECK:   add <8 x i16>
 ;
 ; CHECK: middle.block:
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   shufflevector <8 x i16>
-; CHECK:   add <8 x i16>
-; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
 define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
diff --git a/test/Transforms/NewGVN/pr32934.ll b/test/Transforms/NewGVN/pr32934.ll
index 4bb7ea150437..c71611f782c7 100644
--- a/test/Transforms/NewGVN/pr32934.ll
+++ b/test/Transforms/NewGVN/pr32934.ll
@@ -1,4 +1,3 @@
-; REQUIRES: disabled
 ; RUN: opt -S -newgvn %s | FileCheck %s
 
 ; CHECK: define void @tinkywinky() {
diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index b7fa5452f251..68d6ebd27a5c 100644
--- a/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -11,14 +11,8 @@ target triple = "aarch64--linux-gnu"
 ; DEFAULT-LABEL: @PR28330(
 ; DEFAULT: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
 ; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
-; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
-; DEFAULT: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
-; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
-; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
-; DEFAULT: %bin.extra = add i32 %[[R6]], %tmp17
+; DEFAULT: %[[Rdx:.+]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %[[S0]])
+; DEFAULT: %bin.extra = add i32 %[[Rdx]], %tmp17
 ;
 ; GATHER-LABEL: @PR28330(
 ; GATHER: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
@@ -38,14 +32,8 @@ target triple = "aarch64--linux-gnu"
 ; GATHER: %[[I5:.+]] = insertelement <8 x i32> %[[I4]], i32 %tmp29, i32 5
 ; GATHER: %[[I6:.+]] = insertelement <8 x i32> %[[I5]], i32 %tmp31, i32 6
 ; GATHER: %[[I7:.+]] = insertelement <8 x i32> %[[I6]], i32 %tmp33, i32 7
-; GATHER: %[[R0:.+]] = shufflevector <8 x i32> %[[I7]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER: %[[R1:.+]] = add <8 x i32> %[[I7]], %[[R0]]
-; GATHER: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
-; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
-; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
-; GATHER: %bin.extra = add i32 %[[R6]], %tmp17
+; GATHER: %[[Rdx:.+]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %[[I7]])
+; GATHER: %bin.extra = add i32 %[[Rdx]], %tmp17
 ;
 ; MAX-COST-LABEL: @PR28330(
 ; MAX-COST-NOT: shufflevector
@@ -107,14 +95,8 @@ define void @PR32038(i32 %n) {
 ; DEFAULT-NEXT:    [[TMP28:%.*]] = add i32 [[TMP26]], undef
 ; DEFAULT-NEXT:    [[TMP30:%.*]] = add i32 [[TMP28]], undef
 ; DEFAULT-NEXT:    [[TMP32:%.*]] = add i32 [[TMP30]], undef
-; DEFAULT-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
-; DEFAULT-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; DEFAULT-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; DEFAULT-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
-; DEFAULT-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; DEFAULT-NEXT:    [[BIN_EXTRA]] = add i32 [[TMP3]], -5
+; DEFAULT-NEXT:    [[Rdx:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
+; DEFAULT-NEXT:    [[BIN_EXTRA]] = add i32 [[Rdx]], -5
 ; DEFAULT-NEXT:    [[TMP34:%.*]] = add i32 [[TMP32]], undef
 ; DEFAULT-NEXT:    br label [[FOR_BODY]]
 ;
@@ -162,14 +144,8 @@ define void @PR32038(i32 %n) {
 ; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5
 ; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6
 ; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7
-; GATHER-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP7]], [[RDX_SHUF]]
-; GATHER-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
-; GATHER-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; GATHER-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
-; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; GATHER-NEXT:    [[BIN_EXTRA]] = add i32 [[TMP8]], -5
+; GATHER-NEXT:    [[Rdx:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]])
+; GATHER-NEXT:    [[BIN_EXTRA]] = add i32 [[Rdx]], -5
 ; GATHER-NEXT:    [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
diff --git a/tools/llvm-pdbdump/Analyze.cpp b/tools/llvm-pdbdump/Analyze.cpp
index f7d6ec53b030..ab4477ed7bad 100644
--- a/tools/llvm-pdbdump/Analyze.cpp
+++ b/tools/llvm-pdbdump/Analyze.cpp
@@ -76,26 +76,15 @@ Error AnalysisStyle::dump() {
 
   TypeDatabase TypeDB(Tpi->getNumTypeRecords());
   TypeDatabaseVisitor DBV(TypeDB);
-  TypeDeserializer Deserializer;
   TypeVisitorCallbackPipeline Pipeline;
   HashLookupVisitor Hasher(*Tpi);
-  // Deserialize the types
-  Pipeline.addCallbackToPipeline(Deserializer);
   // Add them to the database
   Pipeline.addCallbackToPipeline(DBV);
   // Store their hash values
   Pipeline.addCallbackToPipeline(Hasher);
 
-  CVTypeVisitor Visitor(Pipeline);
-
-  bool Error = false;
-  for (auto Item : Tpi->types(&Error)) {
-    if (auto EC = Visitor.visitTypeRecord(Item))
-      return EC;
-  }
-  if (Error)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "TPI stream contained corrupt record");
+  if (auto EC = codeview::visitTypeStream(Tpi->typeArray(), Pipeline))
+    return EC;
 
   auto &Adjusters = Tpi->getHashAdjusters();
   DenseSet<uint32_t> AdjusterSet;
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
index e975a5220af6..c4fecb80ea5a 100644
--- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -178,11 +178,10 @@ class C13RawVisitor : public C13DebugFragmentVisitor {
 private:
   Error dumpTypeRecord(StringRef Label, TypeDatabase &DB, TypeIndex Index) {
     CompactTypeDumpVisitor CTDV(DB, Index, &P);
-    CVTypeVisitor Visitor(CTDV);
     DictScope D(P, Label);
     if (DB.contains(Index)) {
       CVType &Type = DB.getTypeRecord(Index);
-      if (auto EC = Visitor.visitTypeRecord(Type))
+      if (auto EC = codeview::visitTypeRecord(Type, CTDV))
         return EC;
     } else {
       P.printString(
@@ -629,7 +628,6 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
 
   std::vector<std::unique_ptr<TypeVisitorCallbacks>> Visitors;
 
-  Visitors.push_back(make_unique<TypeDeserializer>());
   if (!StreamDB.hasValue()) {
     StreamDB.emplace(Tpi->getNumTypeRecords());
     Visitors.push_back(make_unique<TypeDatabaseVisitor>(*StreamDB));
@@ -659,8 +657,6 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
   for (const auto &V : Visitors)
     Pipeline.addCallbackToPipeline(*V);
 
-  CVTypeVisitor Visitor(Pipeline);
-
   if (DumpRecords || DumpRecordBytes)
     RecordScope = llvm::make_unique<ListScope>(P, "Records");
 
@@ -673,9 +669,10 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     if ((DumpRecords || DumpRecordBytes) && !opts::raw::CompactRecords)
       OneRecordScope = llvm::make_unique<DictScope>(P, "");
 
-    if (auto EC = Visitor.visitTypeRecord(Type))
+    if (auto EC = codeview::visitTypeRecord(Type, Pipeline))
       return EC;
-    T.setIndex(T.getIndex() + 1);
+
+    ++T;
   }
   if (HadError)
     return make_error<RawError>(raw_error_code::corrupt_file,
@@ -730,22 +727,19 @@ Error LLVMOutputStyle::buildTypeDatabase(uint32_t SN) {
 
   DB.emplace(Tpi->getNumTypeRecords());
 
-  TypeVisitorCallbackPipeline Pipeline;
-  TypeDeserializer Deserializer;
   TypeDatabaseVisitor DBV(*DB);
-  Pipeline.addCallbackToPipeline(Deserializer);
-  Pipeline.addCallbackToPipeline(DBV);
 
   auto HashValues = Tpi->getHashValues();
-  std::unique_ptr<TpiHashVerifier> HashVerifier;
-  if (!HashValues.empty()) {
-    HashVerifier =
-        make_unique<TpiHashVerifier>(HashValues, Tpi->getNumHashBuckets());
-    Pipeline.addCallbackToPipeline(*HashVerifier);
-  }
+  if (HashValues.empty())
+    return codeview::visitTypeStream(Tpi->typeArray(), DBV);
 
-  CVTypeVisitor Visitor(Pipeline);
-  return Visitor.visitTypeStream(Tpi->types(nullptr));
+  TypeVisitorCallbackPipeline Pipeline;
+  Pipeline.addCallbackToPipeline(DBV);
+
+  TpiHashVerifier HashVerifier(HashValues, Tpi->getNumHashBuckets());
+  Pipeline.addCallbackToPipeline(HashVerifier);
+
+  return codeview::visitTypeStream(Tpi->typeArray(), Pipeline);
 }
 
 Error LLVMOutputStyle::dumpDbiStream() {
diff --git a/tools/llvm-pdbdump/PdbYaml.cpp b/tools/llvm-pdbdump/PdbYaml.cpp
index d6ba7d645459..6527bec31a77 100644
--- a/tools/llvm-pdbdump/PdbYaml.cpp
+++ b/tools/llvm-pdbdump/PdbYaml.cpp
@@ -371,16 +371,14 @@ void MappingContextTraits<PdbInlineeInfo, SerializationContext>::mapping(
 void MappingContextTraits<PdbTpiRecord, pdb::yaml::SerializationContext>::
     mapping(IO &IO, pdb::yaml::PdbTpiRecord &Obj,
             pdb::yaml::SerializationContext &Context) {
-  codeview::TypeVisitorCallbackPipeline Pipeline;
-  codeview::TypeDeserializer Deserializer;
-  codeview::TypeSerializer Serializer(Context.Allocator);
-  pdb::TpiHashUpdater Hasher;
 
   if (IO.outputting()) {
     // For PDB to Yaml, deserialize into a high level record type, then dump it.
-    Pipeline.addCallbackToPipeline(Deserializer);
-    Pipeline.addCallbackToPipeline(Context.Dumper);
+    consumeError(codeview::visitTypeRecord(Obj.Record, Context.Dumper));
   } else {
+    codeview::TypeVisitorCallbackPipeline Pipeline;
+    codeview::TypeSerializer Serializer(Context.Allocator);
+    pdb::TpiHashUpdater Hasher;
     // For Yaml to PDB, extract from the high level record type, then write it
     // to bytes.
 
@@ -391,9 +389,9 @@ void MappingContextTraits<PdbTpiRecord, pdb::yaml::SerializationContext>::
     Pipeline.addCallbackToPipeline(Context.Dumper);
     Pipeline.addCallbackToPipeline(Serializer);
     Pipeline.addCallbackToPipeline(Hasher);
+    consumeError(codeview::visitTypeRecord(Obj.Record, Pipeline,
+                                           codeview::VDS_BytesExternal));
   }
 
-  codeview::CVTypeVisitor Visitor(Pipeline);
-  consumeError(Visitor.visitTypeRecord(Obj.Record));
   Context.ActiveSerializer = nullptr;
 }
diff --git a/tools/llvm-pdbdump/YamlTypeDumper.cpp b/tools/llvm-pdbdump/YamlTypeDumper.cpp
index b4eb197e866a..3e447ca60b61 100644
--- a/tools/llvm-pdbdump/YamlTypeDumper.cpp
+++ b/tools/llvm-pdbdump/YamlTypeDumper.cpp
@@ -280,16 +280,8 @@ bool ScalarTraits<APSInt>::mustQuote(StringRef Scalar) { return false; }
 
 void MappingContextTraits<CVType, pdb::yaml::SerializationContext>::mapping(
     IO &IO, CVType &Record, pdb::yaml::SerializationContext &Context) {
-  if (IO.outputting()) {
-    codeview::TypeDeserializer Deserializer;
-
-    codeview::TypeVisitorCallbackPipeline Pipeline;
-    Pipeline.addCallbackToPipeline(Deserializer);
-    Pipeline.addCallbackToPipeline(Context.Dumper);
-
-    codeview::CVTypeVisitor Visitor(Pipeline);
-    consumeError(Visitor.visitTypeRecord(Record));
-  }
+  if (IO.outputting())
+    consumeError(codeview::visitTypeRecord(Record, Context.Dumper));
 }
 
 void MappingTraits<StringIdRecord>::mapping(IO &IO, StringIdRecord &String) {
@@ -556,26 +548,17 @@ void llvm::codeview::yaml::YamlTypeDumperCallbacks::visitKnownRecordImpl(
     // (top-level and member fields all have the exact same Yaml syntax so use
     // the same parser).
     FieldListRecordSplitter Splitter(FieldListRecords);
-    CVTypeVisitor V(Splitter);
-    consumeError(V.visitFieldListMemberStream(FieldList.Data));
-    YamlIO.mapRequired("FieldList", FieldListRecords, Context);
-  } else {
-    // If we are not outputting, then the array contains no data starting out,
-    // and is instead populated from the sequence represented by the yaml --
-    // again, using the same logic that we use for top-level records.
-    assert(Context.ActiveSerializer && "There is no active serializer!");
-    codeview::TypeVisitorCallbackPipeline Pipeline;
-    pdb::TpiHashUpdater Hasher;
-
-    // For Yaml to PDB, dump it (to fill out the record fields from the Yaml)
-    // then serialize those fields to bytes, then update their hashes.
-    Pipeline.addCallbackToPipeline(Context.Dumper);
-    Pipeline.addCallbackToPipeline(*Context.ActiveSerializer);
-    Pipeline.addCallbackToPipeline(Hasher);
-
-    codeview::CVTypeVisitor Visitor(Pipeline);
-    YamlIO.mapRequired("FieldList", FieldListRecords, Visitor);
+    consumeError(codeview::visitMemberRecordStream(FieldList.Data, Splitter));
   }
+  // Note that if we're not outputting (i.e. Yaml -> PDB) the result of this
+  // mapping gets lost, as the records are simply stored in this locally scoped
+  // vector.  What's important though is they are all sharing a single
+  // Serializer
+  // instance (in `Context.ActiveSerializer`), and that is building up a list of
+  // all the types.  The fact that we need a throwaway vector here is just to
+  // appease the YAML API to treat this as a sequence and do this mapping once
+  // for each YAML Sequence element in the input Yaml stream.
+  YamlIO.mapRequired("FieldList", FieldListRecords, Context);
 }
 
 namespace llvm {
@@ -585,29 +568,22 @@ struct MappingContextTraits<pdb::yaml::PdbTpiFieldListRecord,
                             pdb::yaml::SerializationContext> {
   static void mapping(IO &IO, pdb::yaml::PdbTpiFieldListRecord &Obj,
                       pdb::yaml::SerializationContext &Context) {
-    assert(IO.outputting());
-    codeview::TypeVisitorCallbackPipeline Pipeline;
+    if (IO.outputting())
+      consumeError(codeview::visitMemberRecord(Obj.Record, Context.Dumper));
+    else {
+      // If we are not outputting, then the array contains no data starting out,
+      // and is instead populated from the sequence represented by the yaml --
+      // again, using the same logic that we use for top-level records.
+      assert(Context.ActiveSerializer && "There is no active serializer!");
+      codeview::TypeVisitorCallbackPipeline Pipeline;
+      pdb::TpiHashUpdater Hasher;
 
-    BinaryByteStream Data(Obj.Record.Data, llvm::support::little);
-    BinaryStreamReader FieldReader(Data);
-    codeview::FieldListDeserializer Deserializer(FieldReader);
-
-    // For PDB to Yaml, deserialize into a high level record type, then dump
-    // it.
-    Pipeline.addCallbackToPipeline(Deserializer);
-    Pipeline.addCallbackToPipeline(Context.Dumper);
-
-    codeview::CVTypeVisitor Visitor(Pipeline);
-    consumeError(Visitor.visitMemberRecord(Obj.Record));
-  }
-};
-
-template <>
-struct MappingContextTraits<pdb::yaml::PdbTpiFieldListRecord,
-                            codeview::CVTypeVisitor> {
-  static void mapping(IO &IO, pdb::yaml::PdbTpiFieldListRecord &Obj,
-                      codeview::CVTypeVisitor &Visitor) {
-    consumeError(Visitor.visitMemberRecord(Obj.Record));
+      Pipeline.addCallbackToPipeline(Context.Dumper);
+      Pipeline.addCallbackToPipeline(*Context.ActiveSerializer);
+      Pipeline.addCallbackToPipeline(Hasher);
+      consumeError(
+          codeview::visitMemberRecord(Obj.Record, Pipeline, VDS_BytesExternal));
+    }
   }
 };
 }
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index faf362abc9d8..d6a2075ca609 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -182,15 +182,13 @@ TYPED_TEST(BitVectorTest, TrivialOperation) {
   EXPECT_TRUE(Vec.empty());
 }
 
-TYPED_TEST(BitVectorTest, FindOperations) {
+TYPED_TEST(BitVectorTest, SimpleFindOps) {
   // Test finding in an empty BitVector.
   TypeParam A;
   EXPECT_EQ(-1, A.find_first());
   EXPECT_EQ(-1, A.find_last());
   EXPECT_EQ(-1, A.find_first_unset());
   EXPECT_EQ(-1, A.find_last_unset());
-  EXPECT_EQ(-1, A.find_next(0));
-  EXPECT_EQ(-1, A.find_next_unset(0));
 
   // Test finding next set and unset bits in a BitVector with multiple words
   A.resize(100);
@@ -222,9 +220,10 @@ TYPED_TEST(BitVectorTest, FindOperations) {
   A.set(0, 100);
   EXPECT_EQ(100U, A.count());
   EXPECT_EQ(0, A.find_first());
-  EXPECT_EQ(99, A.find_last());
   EXPECT_EQ(-1, A.find_first_unset());
   EXPECT_EQ(-1, A.find_last_unset());
+  EXPECT_EQ(99, A.find_last());
+  EXPECT_EQ(99, A.find_next(98));
 
   A.reset(0, 100);
   EXPECT_EQ(0U, A.count());
@@ -232,6 +231,7 @@ TYPED_TEST(BitVectorTest, FindOperations) {
   EXPECT_EQ(-1, A.find_last());
   EXPECT_EQ(0, A.find_first_unset());
   EXPECT_EQ(99, A.find_last_unset());
+  EXPECT_EQ(99, A.find_next_unset(98));
 
   // Also test with a vector that is small enough to fit in 1 word.
   A.resize(20);
@@ -258,6 +258,153 @@ TYPED_TEST(BitVectorTest, FindOperations) {
   EXPECT_EQ(17, A.find_next_unset(15));
 }
 
+TEST(BitVectorTest, FindInRangeMultiWord) {
+  BitVector Vec;
+
+  Vec.resize(200);
+  Vec.set(3, 7);
+  Vec.set(24, 35);
+  Vec.set(50, 70);
+  Vec.set(150);
+  Vec.set(152);
+  Vec.set(154);
+
+  // find first
+  EXPECT_EQ(-1, Vec.find_first_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_first_in(24, 24));
+  EXPECT_EQ(-1, Vec.find_first_in(7, 24));
+
+  EXPECT_EQ(3, Vec.find_first_in(0, 10));
+  EXPECT_EQ(4, Vec.find_first_in(4, 10));
+  EXPECT_EQ(150, Vec.find_first_in(100, 200));
+  EXPECT_EQ(152, Vec.find_first_in(151, 200));
+  EXPECT_EQ(154, Vec.find_first_in(153, 200));
+
+  EXPECT_EQ(-1, Vec.find_first_in(155, 200));
+  Vec.set(199);
+  EXPECT_EQ(199, Vec.find_first_in(199, 200));
+  Vec.reset(199);
+
+  // find last
+  EXPECT_EQ(-1, Vec.find_last_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_last_in(24, 24));
+  EXPECT_EQ(-1, Vec.find_last_in(7, 24));
+
+  EXPECT_EQ(6, Vec.find_last_in(0, 10));
+  EXPECT_EQ(5, Vec.find_last_in(0, 6));
+  EXPECT_EQ(154, Vec.find_last_in(100, 155));
+  EXPECT_EQ(152, Vec.find_last_in(100, 154));
+  EXPECT_EQ(150, Vec.find_last_in(100, 152));
+  EXPECT_EQ(-1, Vec.find_last_in(100, 150));
+  Vec.set(199);
+  EXPECT_EQ(199, Vec.find_last_in(199, 200));
+  Vec.reset(199);
+
+  // find first unset
+  EXPECT_EQ(-1, Vec.find_first_unset_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_first_unset_in(23, 23));
+  EXPECT_EQ(-1, Vec.find_first_unset_in(24, 35));
+
+  EXPECT_EQ(0, Vec.find_first_unset_in(0, 10));
+  EXPECT_EQ(1, Vec.find_first_unset_in(1, 10));
+  EXPECT_EQ(7, Vec.find_first_unset_in(5, 25));
+  EXPECT_EQ(151, Vec.find_first_unset_in(150, 200));
+  EXPECT_EQ(151, Vec.find_first_unset_in(151, 200));
+  EXPECT_EQ(153, Vec.find_first_unset_in(152, 200));
+  EXPECT_EQ(153, Vec.find_first_unset_in(153, 200));
+  EXPECT_EQ(155, Vec.find_first_unset_in(154, 200));
+  EXPECT_EQ(155, Vec.find_first_unset_in(155, 200));
+  EXPECT_EQ(199, Vec.find_first_unset_in(199, 200));
+
+  // find last unset
+  EXPECT_EQ(-1, Vec.find_last_unset_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_last_unset_in(23, 23));
+  EXPECT_EQ(-1, Vec.find_last_unset_in(24, 35));
+
+  EXPECT_EQ(9, Vec.find_last_unset_in(0, 10));
+  EXPECT_EQ(8, Vec.find_last_unset_in(0, 9));
+  EXPECT_EQ(2, Vec.find_last_unset_in(0, 7));
+  EXPECT_EQ(149, Vec.find_last_unset_in(100, 151));
+  EXPECT_EQ(151, Vec.find_last_unset_in(100, 152));
+  EXPECT_EQ(151, Vec.find_last_unset_in(100, 153));
+  EXPECT_EQ(153, Vec.find_last_unset_in(100, 154));
+  EXPECT_EQ(153, Vec.find_last_unset_in(100, 155));
+  EXPECT_EQ(155, Vec.find_last_unset_in(100, 156));
+  EXPECT_EQ(199, Vec.find_last_unset_in(199, 200));
+}
+
+TEST(BitVectorTest, FindInRangeSingleWord) {
+  // When the bit vector contains only a single word, this is slightly different
+  // than when the bit vector contains multiple words, because masks are applied
+  // to the front and back of the same word.  So make sure this works.
+  BitVector Vec;
+
+  Vec.resize(25);
+  Vec.set(2, 4);
+  Vec.set(6, 9);
+  Vec.set(12, 15);
+  Vec.set(19);
+  Vec.set(21);
+  Vec.set(23);
+
+  // find first
+  EXPECT_EQ(-1, Vec.find_first_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_first_in(24, 24));
+  EXPECT_EQ(-1, Vec.find_first_in(9, 12));
+
+  EXPECT_EQ(2, Vec.find_first_in(0, 10));
+  EXPECT_EQ(6, Vec.find_first_in(4, 10));
+  EXPECT_EQ(19, Vec.find_first_in(18, 25));
+  EXPECT_EQ(21, Vec.find_first_in(20, 25));
+  EXPECT_EQ(23, Vec.find_first_in(22, 25));
+  EXPECT_EQ(-1, Vec.find_first_in(24, 25));
+
+  // find last
+  EXPECT_EQ(-1, Vec.find_last_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_last_in(24, 24));
+  EXPECT_EQ(-1, Vec.find_last_in(9, 12));
+
+  EXPECT_EQ(8, Vec.find_last_in(0, 10));
+  EXPECT_EQ(3, Vec.find_last_in(0, 6));
+  EXPECT_EQ(23, Vec.find_last_in(18, 25));
+  EXPECT_EQ(21, Vec.find_last_in(18, 23));
+  EXPECT_EQ(19, Vec.find_last_in(18, 21));
+  EXPECT_EQ(-1, Vec.find_last_in(18, 19));
+
+  // find first unset
+  EXPECT_EQ(-1, Vec.find_first_unset_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_first_unset_in(23, 23));
+  EXPECT_EQ(-1, Vec.find_first_unset_in(6, 9));
+
+  EXPECT_EQ(0, Vec.find_first_unset_in(0, 6));
+  EXPECT_EQ(1, Vec.find_first_unset_in(1, 6));
+  EXPECT_EQ(9, Vec.find_first_unset_in(7, 13));
+  EXPECT_EQ(18, Vec.find_first_unset_in(18, 25));
+  EXPECT_EQ(20, Vec.find_first_unset_in(19, 25));
+  EXPECT_EQ(20, Vec.find_first_unset_in(20, 25));
+  EXPECT_EQ(22, Vec.find_first_unset_in(21, 25));
+  EXPECT_EQ(22, Vec.find_first_unset_in(22, 25));
+  EXPECT_EQ(24, Vec.find_first_unset_in(23, 25));
+  EXPECT_EQ(24, Vec.find_first_unset_in(24, 25));
+
+  // find last unset
+  EXPECT_EQ(-1, Vec.find_last_unset_in(0, 0));
+  EXPECT_EQ(-1, Vec.find_last_unset_in(23, 23));
+  EXPECT_EQ(-1, Vec.find_last_unset_in(6, 9));
+
+  EXPECT_EQ(5, Vec.find_last_unset_in(0, 6));
+  EXPECT_EQ(4, Vec.find_last_unset_in(0, 5));
+  EXPECT_EQ(1, Vec.find_last_unset_in(0, 4));
+  EXPECT_EQ(11, Vec.find_last_unset_in(7, 13));
+  EXPECT_EQ(24, Vec.find_last_unset_in(18, 25));
+  EXPECT_EQ(22, Vec.find_last_unset_in(18, 24));
+  EXPECT_EQ(22, Vec.find_last_unset_in(18, 23));
+  EXPECT_EQ(20, Vec.find_last_unset_in(18, 22));
+  EXPECT_EQ(20, Vec.find_last_unset_in(18, 21));
+  EXPECT_EQ(18, Vec.find_last_unset_in(18, 20));
+  EXPECT_EQ(18, Vec.find_last_unset_in(18, 19));
+}
+
 TYPED_TEST(BitVectorTest, CompoundAssignment) {
   TypeParam A;
   A.resize(10);
@@ -660,5 +807,34 @@ TYPED_TEST(BitVectorTest, EmptyVector) {
   testEmpty(E);
 }
 
+TYPED_TEST(BitVectorTest, Iterators) {
+  TypeParam Filled(10, true);
+  EXPECT_NE(Filled.set_bits_begin(), Filled.set_bits_end());
+  unsigned Counter = 0;
+  for (unsigned Bit : Filled.set_bits())
+    EXPECT_EQ(Bit, Counter++);
+
+  TypeParam Empty;
+  EXPECT_EQ(Empty.set_bits_begin(), Empty.set_bits_end());
+  for (unsigned Bit : Empty.set_bits()) {
+    (void)Bit;
+    EXPECT_TRUE(false);
+  }
+
+  TypeParam ToFill(100, false);
+  ToFill.set(0);
+  EXPECT_NE(ToFill.set_bits_begin(), ToFill.set_bits_end());
+  EXPECT_EQ(++ToFill.set_bits_begin(), ToFill.set_bits_end());
+  EXPECT_EQ(*ToFill.set_bits_begin(), 0U);
+  ToFill.reset(0);
+  EXPECT_EQ(ToFill.set_bits_begin(), ToFill.set_bits_end());
+
+  const unsigned List[] = {1, 10, 25, 99};
+  for (unsigned Num : List)
+    ToFill.set(Num);
+  unsigned i = 0;
+  for (unsigned Bit : ToFill.set_bits())
+    EXPECT_EQ(List[i++], Bit);
+}
 }
 #endif
diff --git a/unittests/Analysis/ProfileSummaryInfoTest.cpp b/unittests/Analysis/ProfileSummaryInfoTest.cpp
index 3454474f0376..c9e4fc029dc0 100644
--- a/unittests/Analysis/ProfileSummaryInfoTest.cpp
+++ b/unittests/Analysis/ProfileSummaryInfoTest.cpp
@@ -102,6 +102,9 @@ TEST_F(ProfileSummaryInfoTest, TestNoProfile) {
   Function *F = M->getFunction("f");
 
   ProfileSummaryInfo PSI = buildPSI(M.get());
+  EXPECT_FALSE(PSI.hasProfileSummary());
+  EXPECT_FALSE(PSI.hasSampleProfile());
+  EXPECT_FALSE(PSI.hasInstrumentationProfile());
   // In the absence of profiles, is{Hot|Cold}X methods should always return
   // false.
   EXPECT_FALSE(PSI.isHotCount(1000));
@@ -130,6 +133,7 @@ TEST_F(ProfileSummaryInfoTest, TestCommon) {
   Function *H = M->getFunction("h");
 
   ProfileSummaryInfo PSI = buildPSI(M.get());
+  EXPECT_TRUE(PSI.hasProfileSummary());
   EXPECT_TRUE(PSI.isHotCount(400));
   EXPECT_TRUE(PSI.isColdCount(2));
   EXPECT_FALSE(PSI.isColdCount(100));
@@ -144,6 +148,8 @@ TEST_F(ProfileSummaryInfoTest, InstrProf) {
   auto M = makeLLVMModule("InstrProf");
   Function *F = M->getFunction("f");
   ProfileSummaryInfo PSI = buildPSI(M.get());
+  EXPECT_TRUE(PSI.hasProfileSummary());
+  EXPECT_TRUE(PSI.hasInstrumentationProfile());
 
   BasicBlock &BB0 = F->getEntryBlock();
   BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0);
@@ -174,6 +180,8 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) {
   auto M = makeLLVMModule("SampleProfile");
   Function *F = M->getFunction("f");
   ProfileSummaryInfo PSI = buildPSI(M.get());
+  EXPECT_TRUE(PSI.hasProfileSummary());
+  EXPECT_TRUE(PSI.hasSampleProfile());
 
   BasicBlock &BB0 = F->getEntryBlock();
   BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0);
diff --git a/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp b/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
index fedb5978da81..9ff37e93b151 100644
--- a/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
+++ b/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
 #include "llvm/DebugInfo/CodeView/TypeSerializer.h"
diff --git a/unittests/DebugInfo/PDB/TypeServerHandlerTest.cpp b/unittests/DebugInfo/PDB/TypeServerHandlerTest.cpp
index 6995e8f9dded..1a30dad7b341 100644
--- a/unittests/DebugInfo/PDB/TypeServerHandlerTest.cpp
+++ b/unittests/DebugInfo/PDB/TypeServerHandlerTest.cpp
@@ -126,8 +126,8 @@ TEST_F(TypeServerHandlerTest, VisitRecordNoTypeServer) {
 
   Pipeline.addCallbackToPipeline(C1);
   Pipeline.addCallbackToPipeline(C2);
-  CVTypeVisitor Visitor(Pipeline);
-  EXPECT_NO_ERROR(Visitor.visitTypeRecord(TypeServerRecord));
+
+  EXPECT_NO_ERROR(codeview::visitTypeRecord(TypeServerRecord, Pipeline));
 
   EXPECT_EQ(MockTypeVisitorCallbacks::State::VisitTypeEnd, C1.S);
   EXPECT_EQ(MockTypeVisitorCallbacks::State::VisitTypeEnd, C2.S);
@@ -139,16 +139,16 @@ TEST_F(TypeServerHandlerTest, VisitRecordWithTypeServerOnce) {
   MockTypeServerHandler Handler(false);
 
   MockTypeVisitorCallbacks C1;
-  CVTypeVisitor Visitor(C1);
-  Visitor.addTypeServerHandler(Handler);
 
   // Our mock server returns true the first time.
-  EXPECT_NO_ERROR(Visitor.visitTypeRecord(TypeServerRecord));
+  EXPECT_NO_ERROR(codeview::visitTypeRecord(
+      TypeServerRecord, C1, codeview::VDS_BytesExternal, &Handler));
   EXPECT_TRUE(Handler.Handled);
   EXPECT_EQ(MockTypeVisitorCallbacks::State::Ready, C1.S);
 
   // And false the second time.
-  EXPECT_NO_ERROR(Visitor.visitTypeRecord(TypeServerRecord));
+  EXPECT_NO_ERROR(codeview::visitTypeRecord(
+      TypeServerRecord, C1, codeview::VDS_BytesExternal, &Handler));
   EXPECT_TRUE(Handler.Handled);
   EXPECT_EQ(MockTypeVisitorCallbacks::State::VisitTypeEnd, C1.S);
 }
@@ -160,14 +160,14 @@ TEST_F(TypeServerHandlerTest, VisitRecordWithTypeServerAlways) {
   MockTypeServerHandler Handler(true);
 
   MockTypeVisitorCallbacks C1;
-  CVTypeVisitor Visitor(C1);
-  Visitor.addTypeServerHandler(Handler);
 
-  EXPECT_NO_ERROR(Visitor.visitTypeRecord(TypeServerRecord));
+  EXPECT_NO_ERROR(codeview::visitTypeRecord(
+      TypeServerRecord, C1, codeview::VDS_BytesExternal, &Handler));
   EXPECT_TRUE(Handler.Handled);
   EXPECT_EQ(MockTypeVisitorCallbacks::State::Ready, C1.S);
 
-  EXPECT_NO_ERROR(Visitor.visitTypeRecord(TypeServerRecord));
+  EXPECT_NO_ERROR(codeview::visitTypeRecord(
+      TypeServerRecord, C1, codeview::VDS_BytesExternal, &Handler));
   EXPECT_TRUE(Handler.Handled);
   EXPECT_EQ(MockTypeVisitorCallbacks::State::Ready, C1.S);
 }
diff --git a/unittests/Support/BinaryStreamTest.cpp b/unittests/Support/BinaryStreamTest.cpp
index 41567dad6226..ec3b0effc9e9 100644
--- a/unittests/Support/BinaryStreamTest.cpp
+++ b/unittests/Support/BinaryStreamTest.cpp
@@ -16,6 +16,7 @@
 #include "gtest/gtest.h"
 
 #include <unordered_map>
+#include <utility>
 
 using namespace llvm;
 using namespace llvm::support;
@@ -117,7 +118,7 @@ class BrokenStream : public WritableBinaryStream {
 
   // Buffer is organized like this:
   // -------------------------------------------------
-  // | N/2 | N/2+1 | ... | N-1 | 0 | 1 | ... | N-2-1 |
+  // | N/2 | N/2+1 | ... | N-1 | 0 | 1 | ... | N/2-1 |
   // -------------------------------------------------
   // So reads from the beginning actually come from the middle.
   MutableArrayRef<uint8_t> Data;
@@ -348,6 +349,30 @@ TEST_F(BinaryStreamTest, FixedStreamArray) {
   }
 }
 
+// Ensure FixedStreamArrayIterator::operator-> works.
+// Added for coverage of r302257.
+TEST_F(BinaryStreamTest, FixedStreamArrayIteratorArrow) {
+  std::vector<std::pair<uint32_t, uint32_t>> Pairs = {{867, 5309}, {555, 1212}};
+  ArrayRef<uint8_t> PairBytes(reinterpret_cast<uint8_t *>(Pairs.data()),
+    Pairs.size() * sizeof(Pairs[0]));
+
+  initializeInput(PairBytes, alignof(uint32_t));
+
+  for (auto &Stream : Streams) {
+    ASSERT_EQ(InputData.size(), Stream.Input->getLength());
+
+    const FixedStreamArray<std::pair<uint32_t, uint32_t>> Array(*Stream.Input);
+    auto Iter = Array.begin();
+    ASSERT_EQ(Pairs[0].first, Iter->first);
+    ASSERT_EQ(Pairs[0].second, Iter->second);
+    ++Iter;
+    ASSERT_EQ(Pairs[1].first, Iter->first);
+    ASSERT_EQ(Pairs[1].second, Iter->second);
+    ++Iter;
+    ASSERT_EQ(Array.end(), Iter);
+  }
+}
+
 // Test that VarStreamArray works correctly.
 TEST_F(BinaryStreamTest, VarStreamArray) {
   StringLiteral Strings("1. Test2. Longer Test3. Really Long Test4. Super "
@@ -686,7 +711,7 @@ TEST_F(BinaryStreamTest, BinaryItemStream) {
   std::vector<Foo> Foos = {{1, 1.0}, {2, 2.0}, {3, 3.0}};
   BumpPtrAllocator Allocator;
   for (const auto &F : Foos) {
-    uint8_t *Ptr = static_cast<uint8_t *>(Allocator.Allocate(sizeof(Foo), 
+    uint8_t *Ptr = static_cast<uint8_t *>(Allocator.Allocate(sizeof(Foo),
                                                              alignof(Foo)));
     MutableArrayRef<uint8_t> Buffer(Ptr, sizeof(Foo));
     MutableBinaryByteStream Stream(Buffer, llvm::support::big);
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index f8d3c1c9a8c7..e7f2f515d76a 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_unittest(SupportTests
   BlockFrequencyTest.cpp
   BranchProbabilityTest.cpp
   CachePruningTest.cpp
+  CrashRecoveryTest.cpp
   Casting.cpp
   Chrono.cpp
   CommandLineTest.cpp
diff --git a/unittests/Support/CrashRecoveryTest.cpp b/unittests/Support/CrashRecoveryTest.cpp
new file mode 100644
index 000000000000..dbb0db576793
--- /dev/null
+++ b/unittests/Support/CrashRecoveryTest.cpp
@@ -0,0 +1,83 @@
+//===- llvm/unittest/Support/CrashRecoveryTest.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/Compiler.h"
+#include "gtest/gtest.h"
+
+#ifdef LLVM_ON_WIN32
+#define WIN32_LEAN_AND_MEAN
+#define NOGDI
+#include <windows.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::sys;
+
+static int GlobalInt = 0;
+static void nullDeref() { *(volatile int *)nullptr = 0; }
+static void incrementGlobal() { ++GlobalInt; }
+static void llvmTrap() { LLVM_BUILTIN_TRAP; }
+
+TEST(CrashRecoveryTest, Basic) {
+  llvm::CrashRecoveryContext::Enable();
+  GlobalInt = 0;
+  EXPECT_TRUE(CrashRecoveryContext().RunSafely(incrementGlobal));
+  EXPECT_EQ(1, GlobalInt);
+  EXPECT_FALSE(CrashRecoveryContext().RunSafely(nullDeref));
+  EXPECT_FALSE(CrashRecoveryContext().RunSafely(llvmTrap));
+}
+
+struct IncrementGlobalCleanup : CrashRecoveryContextCleanup {
+  IncrementGlobalCleanup(CrashRecoveryContext *CRC)
+      : CrashRecoveryContextCleanup(CRC) {}
+  virtual void recoverResources() { ++GlobalInt; }
+};
+
+static void noop() {}
+
+TEST(CrashRecoveryTest, Cleanup) {
+  llvm::CrashRecoveryContext::Enable();
+  GlobalInt = 0;
+  {
+    CrashRecoveryContext CRC;
+    CRC.registerCleanup(new IncrementGlobalCleanup(&CRC));
+    EXPECT_TRUE(CRC.RunSafely(noop));
+  } // run cleanups
+  EXPECT_EQ(1, GlobalInt);
+
+  GlobalInt = 0;
+  {
+    CrashRecoveryContext CRC;
+    CRC.registerCleanup(new IncrementGlobalCleanup(&CRC));
+    EXPECT_FALSE(CRC.RunSafely(nullDeref));
+  } // run cleanups
+  EXPECT_EQ(1, GlobalInt);
+}
+
+#ifdef LLVM_ON_WIN32
+static void raiseIt() {
+  RaiseException(123, EXCEPTION_NONCONTINUABLE, 0, NULL);
+}
+
+TEST(CrashRecoveryTest, RaiseException) {
+  llvm::CrashRecoveryContext::Enable();
+  EXPECT_FALSE(CrashRecoveryContext().RunSafely(raiseIt));
+}
+
+static void outputString() {
+  OutputDebugStringA("output for debugger\n");
+}
+
+TEST(CrashRecoveryTest, CallOutputDebugString) {
+  llvm::CrashRecoveryContext::Enable();
+  EXPECT_TRUE(CrashRecoveryContext().RunSafely(outputString));
+}
+
+#endif
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index a5c2ea6c7aca..264175ae9677 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -763,7 +763,8 @@ class AsmMatcherInfo {
 
 } // end anonymous namespace
 
-void MatchableInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MatchableInfo::dump() const {
   errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString <<"\"\n";
 
   for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
@@ -772,6 +773,7 @@ void MatchableInfo::dump() const {
     errs() << '\"' << Op.Token << "\"\n";
   }
 }
+#endif
 
 static std::pair<StringRef, StringRef>
 parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) {
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index 65a1ea2f0f21..dc022fe1ceb2 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -775,6 +775,8 @@ class InstructionOperandMatcher : public OperandPredicateMatcher {
   void emitCxxCaptureStmts(raw_ostream &OS, RuleMatcher &Rule,
                            StringRef OperandExpr) const override {
     OS << "if (!" << OperandExpr + ".isReg())\n"
+       << "  return false;\n"
+       << "if (TRI.isPhysicalRegister(" << OperandExpr + ".getReg()))\n"
        << "  return false;\n";
     std::string InsnVarName = Rule.defineInsnVar(
         OS, *InsnMatcher,
@@ -1242,6 +1244,8 @@ class GlobalISelEmitter {
   Error importExplicitUseRenderer(BuildMIAction &DstMIBuilder,
                                   TreePatternNode *DstChild,
                                   const InstructionMatcher &InsnMatcher) const;
+  Error importDefaultOperandRenderers(BuildMIAction &DstMIBuilder,
+                                      DagInit *DefaultOps) const;
   Error
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
                              const std::vector<Record *> &ImplicitDefs) const;
@@ -1321,8 +1325,27 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
 
   // Match the used operands (i.e. the children of the operator).
   for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
-    if (auto Error = importChildMatcher(InsnMatcher, Src->getChild(i), OpIdx++,
-                                        TempOpIdx))
+    TreePatternNode *SrcChild = Src->getChild(i);
+
+    // For G_INTRINSIC, the operand immediately following the defs is an
+    // intrinsic ID.
+    if (SrcGI.TheDef->getName() == "G_INTRINSIC" && i == 0) {
+      if (!SrcChild->isLeaf())
+        return failedImport("Expected IntInit containing intrinsic ID");
+
+      if (IntInit *SrcChildIntInit =
+              dyn_cast<IntInit>(SrcChild->getLeafValue())) {
+        OperandMatcher &OM =
+            InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
+        OM.addPredicate<IntOperandMatcher>(SrcChildIntInit->getValue());
+        continue;
+      }
+
+      return failedImport("Expected IntInit containing instrinsic ID)");
+    }
+
+    if (auto Error =
+            importChildMatcher(InsnMatcher, SrcChild, OpIdx++, TempOpIdx))
       return std::move(Error);
   }
 
@@ -1357,7 +1380,7 @@ Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
 
   auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
   if (!OpTyOrNone)
-    return failedImport("Src operand has an unsupported type");
+    return failedImport("Src operand has an unsupported type (" + to_string(*SrcChild) + ")");
   OM.addPredicate<LLTOperandMatcher>(*OpTyOrNone);
 
   // Check for nested instructions.
@@ -1509,59 +1532,23 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
     DstMIBuilder.addRenderer<CopyRenderer>(InsnMatcher, DstIOperand.Name);
   }
 
-  // Figure out which operands need defaults inserted. Operands that subclass
-  // OperandWithDefaultOps are considered from left to right until we have
-  // enough operands to render the instruction.
-  SmallSet<unsigned, 2> DefaultOperands;
-  unsigned DstINumUses = DstI.Operands.size() - DstI.Operands.NumDefs;
-  unsigned NumDefaultOperands = 0;
-  for (unsigned I = 0; I < DstINumUses &&
-                       DstINumUses > Dst->getNumChildren() + NumDefaultOperands;
-       ++I) {
-    const auto &DstIOperand = DstI.Operands[DstI.Operands.NumDefs + I];
-    if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
-      DefaultOperands.insert(I);
-      NumDefaultOperands +=
-          DstIOperand.Rec->getValueAsDag("DefaultOps")->getNumArgs();
-    }
-  }
-  if (DstINumUses > Dst->getNumChildren() + DefaultOperands.size())
-    return failedImport("Insufficient operands supplied and default ops "
-                        "couldn't make up the shortfall");
-  if (DstINumUses < Dst->getNumChildren() + DefaultOperands.size())
-    return failedImport("Too many operands supplied");
-
   // Render the explicit uses.
   unsigned Child = 0;
+  unsigned DstINumUses = DstI.Operands.size() - DstI.Operands.NumDefs;
+  unsigned NumDefaultOps = 0;
   for (unsigned I = 0; I != DstINumUses; ++I) {
-    // If we need to insert default ops here, then do so.
-    if (DefaultOperands.count(I)) {
-      const auto &DstIOperand = DstI.Operands[DstI.Operands.NumDefs + I];
+    const auto &DstIOperand = DstI.Operands[DstI.Operands.NumDefs + I];
 
+    // If the operand has default values, introduce them now.
+    // FIXME: Until we have a decent test case that dictates we should do
+    // otherwise, we're going to assume that operands with default values cannot
+    // be specified in the patterns. Therefore, adding them will not cause us to
+    // end up with too many rendered operands.
+    if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
       DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
-      for (const auto *DefaultOp : DefaultOps->args()) {
-        // Look through ValueType operators.
-        if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
-          if (const DefInit *DefaultDagOperator =
-                  dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
-            if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
-              DefaultOp = DefaultDagOp->getArg(0);
-          }
-        }
-
-        if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
-          DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
-          continue;
-        }
-
-        if (const IntInit *DefaultIntOp = dyn_cast<IntInit>(DefaultOp)) {
-          DstMIBuilder.addRenderer<ImmRenderer>(DefaultIntOp->getValue());
-          continue;
-        }
-
-        return failedImport("Could not add default op");
-      }
-
+      if (auto Error = importDefaultOperandRenderers(DstMIBuilder, DefaultOps))
+        return std::move(Error);
+      ++NumDefaultOps;
       continue;
     }
 
@@ -1571,9 +1558,44 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
     ++Child;
   }
 
+  if (NumDefaultOps + Dst->getNumChildren() != DstINumUses)
+    return failedImport("Expected " + llvm::to_string(DstINumUses) +
+                        " used operands but found " +
+                        llvm::to_string(Dst->getNumChildren()) +
+                        " explicit ones and " + llvm::to_string(NumDefaultOps) +
+                        " default ones");
+
   return DstMIBuilder;
 }
 
+Error GlobalISelEmitter::importDefaultOperandRenderers(
+    BuildMIAction &DstMIBuilder, DagInit *DefaultOps) const {
+  for (const auto *DefaultOp : DefaultOps->args()) {
+    // Look through ValueType operators.
+    if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
+      if (const DefInit *DefaultDagOperator =
+              dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
+        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+          DefaultOp = DefaultDagOp->getArg(0);
+      }
+    }
+
+    if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
+      DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
+      continue;
+    }
+
+    if (const IntInit *DefaultIntOp = dyn_cast<IntInit>(DefaultOp)) {
+      DstMIBuilder.addRenderer<ImmRenderer>(DefaultIntOp->getValue());
+      continue;
+    }
+
+    return failedImport("Could not add default op");
+  }
+
+  return Error::success();
+}
+
 Error GlobalISelEmitter::importImplicitDefRenderers(
     BuildMIAction &DstMIBuilder,
     const std::vector<Record *> &ImplicitDefs) const {
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 10cd7775060f..a7f407fc210c 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -282,15 +282,9 @@ def main_with_tmp(builtinParameters):
     debug_group.add_argument("--show-tests", dest="showTests",
                       help="Show all discovered tests",
                       action="store_true", default=False)
-    debug_group.add_argument("--use-process-pool", dest="executionStrategy",
-                      help="Run tests in parallel with a process pool",
-                      action="store_const", const="PROCESS_POOL")
     debug_group.add_argument("--use-processes", dest="executionStrategy",
                       help="Run tests in parallel with processes (not threads)",
                       action="store_const", const="PROCESSES")
-    debug_group.add_argument("--use-threads", dest="executionStrategy",
-                      help="Run tests in parallel with threads (not processes)",
-                      action="store_const", const="THREADS")
 
     opts = parser.parse_args()
     args = opts.test_paths
@@ -305,9 +299,6 @@ def main_with_tmp(builtinParameters):
     if opts.numThreads is None:
         opts.numThreads = lit.util.detectCPUs()
 
-    if opts.executionStrategy is None:
-        opts.executionStrategy = 'PROCESS_POOL'
-
     if opts.maxFailures == 0:
         parser.error("Setting --max-failures to 0 does not have any effect.")
 
@@ -490,8 +481,7 @@ def main_with_tmp(builtinParameters):
     startTime = time.time()
     display = TestingProgressDisplay(opts, len(run.tests), progressBar)
     try:
-        run.execute_tests(display, opts.numThreads, opts.maxTime,
-                          opts.executionStrategy)
+        run.execute_tests(display, opts.numThreads, opts.maxTime)
     except KeyboardInterrupt:
         sys.exit(2)
     display.finish()
diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
index 27c7a9e59f8b..aa4fdc18b877 100644
--- a/utils/lit/lit/run.py
+++ b/utils/lit/lit/run.py
@@ -13,11 +13,7 @@
 except ImportError:
     win32api = None
 
-try:
-    import multiprocessing
-except ImportError:
-    multiprocessing = None
-
+import multiprocessing
 import lit.Test
 
 def abort_now():
@@ -227,8 +223,7 @@ def __init__(self, lit_config, tests):
     def execute_test(self, test):
         return execute_test(test, self.lit_config, self.parallelism_semaphores)
 
-    def execute_tests(self, display, jobs, max_time=None,
-                      execution_strategy=None):
+    def execute_tests(self, display, jobs, max_time=None):
         """
         execute_tests(display, jobs, [max_time])
 
@@ -249,100 +244,6 @@ def execute_tests(self, display, jobs, max_time=None,
         computed. Tests which were not actually executed (for any reason) will
         be given an UNRESOLVED result.
         """
-
-        if execution_strategy == 'PROCESS_POOL':
-            self.execute_tests_with_mp_pool(display, jobs, max_time)
-            return
-        # FIXME: Standardize on the PROCESS_POOL execution strategy and remove
-        # the other two strategies.
-
-        use_processes = execution_strategy == 'PROCESSES'
-
-        # Choose the appropriate parallel execution implementation.
-        consumer = None
-        if jobs != 1 and use_processes and multiprocessing:
-            try:
-                task_impl = multiprocessing.Process
-                queue_impl = multiprocessing.Queue
-                sem_impl = multiprocessing.Semaphore
-                canceled_flag =  multiprocessing.Value('i', 0)
-                consumer = MultiprocessResultsConsumer(self, display, jobs)
-            except:
-                # multiprocessing fails to initialize with certain OpenBSD and
-                # FreeBSD Python versions: http://bugs.python.org/issue3770
-                # Unfortunately the error raised also varies by platform.
-                self.lit_config.note('failed to initialize multiprocessing')
-                consumer = None
-        if not consumer:
-            task_impl = threading.Thread
-            queue_impl = queue.Queue
-            sem_impl = threading.Semaphore
-            canceled_flag = LockedValue(0)
-            consumer = ThreadResultsConsumer(display)
-
-        self.parallelism_semaphores = {k: sem_impl(v)
-            for k, v in self.lit_config.parallelism_groups.items()}
-
-        # Create the test provider.
-        provider = TestProvider(queue_impl, canceled_flag)
-        handleFailures(provider, consumer, self.lit_config.maxFailures)
-
-        # Putting tasks into the threading or multiprocessing Queue may block,
-        # so do it in a separate thread.
-        # https://docs.python.org/2/library/multiprocessing.html
-        # e.g: On Mac OS X, we will hang if we put 2^15 elements in the queue
-        # without taking any out.
-        queuer = task_impl(target=provider.queue_tests, args=(self.tests, jobs))
-        queuer.start()
-
-        # Install a console-control signal handler on Windows.
-        if win32api is not None:
-            def console_ctrl_handler(type):
-                provider.cancel()
-                return True
-            win32api.SetConsoleCtrlHandler(console_ctrl_handler, True)
-
-        # Install a timeout handler, if requested.
-        if max_time is not None:
-            def timeout_handler():
-                provider.cancel()
-            timeout_timer = threading.Timer(max_time, timeout_handler)
-            timeout_timer.start()
-
-        # If not using multiple tasks, just run the tests directly.
-        if jobs == 1:
-            run_one_tester(self, provider, consumer)
-        else:
-            # Otherwise, execute the tests in parallel
-            self._execute_tests_in_parallel(task_impl, provider, consumer, jobs)
-
-        queuer.join()
-
-        # Cancel the timeout handler.
-        if max_time is not None:
-            timeout_timer.cancel()
-
-        # Update results for any tests which weren't run.
-        for test in self.tests:
-            if test.result is None:
-                test.setResult(lit.Test.Result(lit.Test.UNRESOLVED, '', 0.0))
-
-    def _execute_tests_in_parallel(self, task_impl, provider, consumer, jobs):
-        # Start all of the tasks.
-        tasks = [task_impl(target=run_one_tester,
-                           args=(self, provider, consumer))
-                 for i in range(jobs)]
-        for t in tasks:
-            t.start()
-
-        # Allow the consumer to handle results, if necessary.
-        consumer.handle_results()
-
-        # Wait for all the tasks to complete.
-        for t in tasks:
-            t.join()
-
-    def execute_tests_with_mp_pool(self, display, jobs, max_time=None):
         # Don't do anything if we aren't going to run any tests.
         if not self.tests or jobs == 0:
             return

From 1ce08792766261dcaa25d8215f9d1c2f70d7b7e9 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:22:49 +0000
Subject: [PATCH 5/9] Vendor import of clang trunk r303291:
 https://llvm.org/svn/llvm-project/cfe/trunk@303291

---
 include/clang/AST/Decl.h                      |  10 -
 include/clang/AST/DeclBase.h                  |  14 ++
 include/clang/Basic/LangOptions.h             |   5 +
 include/clang/Basic/SourceManager.h           |   2 +-
 include/clang/Frontend/ASTUnit.h              |   6 -
 include/clang/Sema/Sema.h                     |   4 +-
 lib/AST/ASTDumper.cpp                         |   6 +-
 lib/AST/Decl.cpp                              |   4 +-
 lib/AST/DeclBase.cpp                          |  10 +-
 lib/AST/ODRHash.cpp                           |  80 +++++-
 lib/CodeGen/CGDebugInfo.cpp                   |   2 +-
 lib/Driver/ToolChains/MSVC.cpp                |  11 +-
 lib/Frontend/ASTUnit.cpp                      |  23 --
 lib/Headers/xmmintrin.h                       |  25 +-
 lib/Lex/Lexer.cpp                             |   4 +
 lib/Sema/Sema.cpp                             |   9 +-
 lib/Sema/SemaDecl.cpp                         |  15 ++
 lib/Sema/SemaLookup.cpp                       |  57 -----
 lib/Serialization/ASTReader.cpp               |  31 +--
 test/CodeGenCXX/debug-info-namespace.cpp      |   6 +
 test/Modules/Inputs/module.map                |   4 +
 test/Modules/Inputs/objcAtKeywordMissingEnd.h |   3 +
 test/Modules/Inputs/submodule-visibility/b.h  |   6 +-
 .../Inputs/submodule-visibility/other.h       |   9 +
 test/Modules/objc-at-keyword.m                |   7 +
 test/Modules/odr_hash.cpp                     | 231 ++++++++++++++++++
 test/Modules/submodule-visibility.cpp         |   9 +
 27 files changed, 429 insertions(+), 164 deletions(-)
 create mode 100644 test/Modules/Inputs/objcAtKeywordMissingEnd.h
 create mode 100644 test/Modules/objc-at-keyword.m

diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h
index facef8e55f7a..4f8042ac9291 100644
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -301,16 +301,6 @@ class NamedDecl : public Decl {
   using Decl::isModulePrivate;
   using Decl::setModulePrivate;
 
-  /// \brief Determine whether this declaration is hidden from name lookup.
-  bool isHidden() const { return Hidden; }
-
-  /// \brief Set whether this declaration is hidden from name lookup.
-  void setHidden(bool Hide) {
-    assert((!Hide || isFromASTFile() || hasLocalOwningModuleStorage()) &&
-           "declaration with no owning module can't be hidden");
-    Hidden = Hide;
-  }
-
   /// \brief Determine whether this declaration is a C++ class member.
   bool isCXXClassMember() const {
     const DeclContext *DC = getDeclContext();
diff --git a/include/clang/AST/DeclBase.h b/include/clang/AST/DeclBase.h
index 15ac11a5a777..08879b36cce5 100644
--- a/include/clang/AST/DeclBase.h
+++ b/include/clang/AST/DeclBase.h
@@ -706,6 +706,20 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) Decl {
     reinterpret_cast<Module **>(this)[-1] = M;
   }
 
+  Module *getOwningModule() const {
+    return isFromASTFile() ? getImportedOwningModule() : getLocalOwningModule();
+  }
+
+  /// \brief Determine whether this declaration is hidden from name lookup.
+  bool isHidden() const { return Hidden; }
+
+  /// \brief Set whether this declaration is hidden from name lookup.
+  void setHidden(bool Hide) {
+    assert((!Hide || isFromASTFile() || hasLocalOwningModuleStorage()) &&
+           "declaration with no owning module can't be hidden");
+    Hidden = Hide;
+  }
+
   unsigned getIdentifierNamespace() const {
     return IdentifierNamespace;
   }
diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h
index 20a0e5845602..ceaedf58574f 100644
--- a/include/clang/Basic/LangOptions.h
+++ b/include/clang/Basic/LangOptions.h
@@ -166,6 +166,11 @@ class LangOptions : public LangOptionsBase {
     return getCompilingModule() != CMK_None;
   }
 
+  /// Do we need to track the owning module for a local declaration?
+  bool trackLocalOwningModule() const {
+    return ModulesLocalVisibility;
+  }
+
   bool isSignedOverflowDefined() const {
     return getSignedOverflowBehavior() == SOB_Defined;
   }
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h
index c8fe2ab90c29..6960ea690b91 100644
--- a/include/clang/Basic/SourceManager.h
+++ b/include/clang/Basic/SourceManager.h
@@ -865,7 +865,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
                             const FileEntry *NewFile);
 
   /// \brief Returns true if the file contents have been overridden.
-  bool isFileOverridden(const FileEntry *File) {
+  bool isFileOverridden(const FileEntry *File) const {
     if (OverriddenFilesInfo) {
       if (OverriddenFilesInfo->OverriddenFilesWithBuffer.count(File))
         return true;
diff --git a/include/clang/Frontend/ASTUnit.h b/include/clang/Frontend/ASTUnit.h
index 2a8df1b7b9ae..46395cf6e861 100644
--- a/include/clang/Frontend/ASTUnit.h
+++ b/include/clang/Frontend/ASTUnit.h
@@ -419,7 +419,6 @@ class ASTUnit : public ModuleLoader {
   
   explicit ASTUnit(bool MainFileIsAST);
 
-  void CleanTemporaryFiles();
   bool Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
              std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer);
 
@@ -530,11 +529,6 @@ class ASTUnit : public ModuleLoader {
   ASTMutationListener *getASTMutationListener();
   ASTDeserializationListener *getDeserializationListener();
 
-  /// \brief Add a temporary file that the ASTUnit depends on.
-  ///
-  /// This file will be erased when the ASTUnit is destroyed.
-  void addTemporaryFile(StringRef TempFile);
-
   bool getOnlyLocalDecls() const { return OnlyLocalDecls; }
 
   bool getOwnsRemappedFileBuffers() const { return OwnsRemappedFileBuffers; }
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index e910be14f969..ba2da92c5be1 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -1467,11 +1467,9 @@ class Sema {
 
   VisibleModuleSet VisibleModules;
 
-  Module *CachedFakeTopLevelModule;
-
 public:
   /// \brief Get the module owning an entity.
-  Module *getOwningModule(Decl *Entity);
+  Module *getOwningModule(Decl *Entity) { return Entity->getOwningModule(); }
 
   /// \brief Make a merged definition of an existing hidden definition \p ND
   /// visible at the specified location.
diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp
index ef491ab06f8c..d89be0d9e6fa 100644
--- a/lib/AST/ASTDumper.cpp
+++ b/lib/AST/ASTDumper.cpp
@@ -1038,10 +1038,10 @@ void ASTDumper::dumpDecl(const Decl *D) {
     dumpSourceRange(D->getSourceRange());
     OS << ' ';
     dumpLocation(D->getLocation());
-    if (Module *M = D->getImportedOwningModule())
+    if (D->isFromASTFile())
+      OS << " imported";
+    if (Module *M = D->getOwningModule())
       OS << " in " << M->getFullModuleName();
-    else if (Module *M = D->getLocalOwningModule())
-      OS << " in (local) " << M->getFullModuleName();
     if (auto *ND = dyn_cast<NamedDecl>(D))
       for (Module *M : D->getASTContext().getModulesWithMergedDefinition(
                const_cast<NamedDecl *>(ND)))
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp
index 0f2558e24ba5..a1342f477b68 100644
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -47,9 +47,7 @@ bool Decl::isOutOfLine() const {
 
 TranslationUnitDecl::TranslationUnitDecl(ASTContext &ctx)
     : Decl(TranslationUnit, nullptr, SourceLocation()),
-      DeclContext(TranslationUnit), Ctx(ctx), AnonymousNamespace(nullptr) {
-  Hidden = Ctx.getLangOpts().ModulesLocalVisibility;
-}
+      DeclContext(TranslationUnit), Ctx(ctx), AnonymousNamespace(nullptr) {}
 
 //===----------------------------------------------------------------------===//
 // NamedDecl Implementation
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 5c2c9cbd0180..f6f81692611d 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -75,7 +75,7 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Ctx,
   assert(!Parent || &Parent->getParentASTContext() == &Ctx);
   // With local visibility enabled, we track the owning module even for local
   // declarations.
-  if (Ctx.getLangOpts().ModulesLocalVisibility) {
+  if (Ctx.getLangOpts().trackLocalOwningModule()) {
     // Ensure required alignment of the resulting object by adding extra
     // padding at the start if required.
     size_t ExtraAlign =
@@ -83,7 +83,9 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Ctx,
     char *Buffer = reinterpret_cast<char *>(
         ::operator new(ExtraAlign + sizeof(Module *) + Size + Extra, Ctx));
     Buffer += ExtraAlign;
-    return new (Buffer) Module*(nullptr) + 1;
+    auto *ParentModule =
+        Parent ? cast<Decl>(Parent)->getOwningModule() : nullptr;
+    return new (Buffer) Module*(ParentModule) + 1;
   }
   return ::operator new(Size + Extra, Ctx);
 }
@@ -94,7 +96,7 @@ Module *Decl::getOwningModuleSlow() const {
 }
 
 bool Decl::hasLocalOwningModuleStorage() const {
-  return getASTContext().getLangOpts().ModulesLocalVisibility;
+  return getASTContext().getLangOpts().trackLocalOwningModule();
 }
 
 const char *Decl::getDeclKindName() const {
@@ -273,6 +275,8 @@ void Decl::setLexicalDeclContext(DeclContext *DC) {
     getMultipleDC()->LexicalDC = DC;
   }
   Hidden = cast<Decl>(DC)->Hidden;
+  if (Hidden && !isFromASTFile() && hasLocalOwningModuleStorage())
+    setLocalOwningModule(cast<Decl>(DC)->getOwningModule());
 }
 
 void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC,
diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp
index f4d314a6dd0d..24371db64d07 100644
--- a/lib/AST/ODRHash.cpp
+++ b/lib/AST/ODRHash.cpp
@@ -81,7 +81,35 @@ void ODRHash::AddDeclarationName(DeclarationName Name) {
   }
 }
 
-void ODRHash::AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {}
+void ODRHash::AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {
+  assert(NNS && "Expecting non-null pointer.");
+  const auto *Prefix = NNS->getPrefix();
+  AddBoolean(Prefix);
+  if (Prefix) {
+    AddNestedNameSpecifier(Prefix);
+  }
+  auto Kind = NNS->getKind();
+  ID.AddInteger(Kind);
+  switch (Kind) {
+  case NestedNameSpecifier::Identifier:
+    AddIdentifierInfo(NNS->getAsIdentifier());
+    break;
+  case NestedNameSpecifier::Namespace:
+    AddDecl(NNS->getAsNamespace());
+    break;
+  case NestedNameSpecifier::NamespaceAlias:
+    AddDecl(NNS->getAsNamespaceAlias());
+    break;
+  case NestedNameSpecifier::TypeSpec:
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    AddType(NNS->getAsType());
+    break;
+  case NestedNameSpecifier::Global:
+  case NestedNameSpecifier::Super:
+    break;
+  }
+}
+
 void ODRHash::AddTemplateName(TemplateName Name) {}
 void ODRHash::AddTemplateArgument(TemplateArgument TA) {}
 void ODRHash::AddTemplateParameterList(const TemplateParameterList *TPL) {}
@@ -335,6 +363,20 @@ class ODRTypeVisitor : public TypeVisitor<ODRTypeVisitor> {
     Hash.AddQualType(T);
   }
 
+  void AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {
+    Hash.AddBoolean(NNS);
+    if (NNS) {
+      Hash.AddNestedNameSpecifier(NNS);
+    }
+  }
+
+  void AddIdentifierInfo(const IdentifierInfo *II) {
+    Hash.AddBoolean(II);
+    if (II) {
+      Hash.AddIdentifierInfo(II);
+    }
+  }
+
   void VisitQualifiers(Qualifiers Quals) {
     ID.AddInteger(Quals.getAsOpaqueValue());
   }
@@ -414,6 +456,42 @@ class ODRTypeVisitor : public TypeVisitor<ODRTypeVisitor> {
     AddQualType(T->getDecl()->getUnderlyingType().getCanonicalType());
     VisitType(T);
   }
+
+  void VisitTagType(const TagType *T) {
+    AddDecl(T->getDecl());
+    VisitType(T);
+  }
+
+  void VisitRecordType(const RecordType *T) { VisitTagType(T); }
+  void VisitEnumType(const EnumType *T) { VisitTagType(T); }
+
+  void VisitTypeWithKeyword(const TypeWithKeyword *T) {
+    ID.AddInteger(T->getKeyword());
+    VisitType(T);
+  };
+
+  void VisitDependentNameType(const DependentNameType *T) {
+    AddNestedNameSpecifier(T->getQualifier());
+    AddIdentifierInfo(T->getIdentifier());
+    VisitTypeWithKeyword(T);
+  }
+
+  void VisitDependentTemplateSpecializationType(
+      const DependentTemplateSpecializationType *T) {
+    AddIdentifierInfo(T->getIdentifier());
+    AddNestedNameSpecifier(T->getQualifier());
+    ID.AddInteger(T->getNumArgs());
+    for (const auto &TA : T->template_arguments()) {
+      Hash.AddTemplateArgument(TA);
+    }
+    VisitTypeWithKeyword(T);
+  }
+
+  void VisitElaboratedType(const ElaboratedType *T) {
+    AddNestedNameSpecifier(T->getQualifier());
+    AddQualType(T->getNamedType());
+    VisitTypeWithKeyword(T);
+  }
 };
 
 void ODRHash::AddType(const Type *T) {
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 9d77c61bd52c..bf178dd7fd80 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -2860,7 +2860,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
 
   if (DebugKind >= codegenoptions::LimitedDebugInfo) {
     if (const NamespaceDecl *NSDecl =
-        dyn_cast_or_null<NamespaceDecl>(FD->getLexicalDeclContext()))
+        dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
       FDContext = getOrCreateNamespace(NSDecl);
     else if (const RecordDecl *RDecl =
              dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
diff --git a/lib/Driver/ToolChains/MSVC.cpp b/lib/Driver/ToolChains/MSVC.cpp
index a09304814ca6..6f5f54165b3b 100644
--- a/lib/Driver/ToolChains/MSVC.cpp
+++ b/lib/Driver/ToolChains/MSVC.cpp
@@ -125,8 +125,15 @@ static bool findVCToolChainViaEnvironment(std::string &Path,
         continue;
 
       // whatever/VC/bin --> old toolchain, VC dir is toolchain dir.
-      if (llvm::sys::path::filename(PathEntry) == "bin") {
-        llvm::StringRef ParentPath = llvm::sys::path::parent_path(PathEntry);
+      llvm::StringRef TestPath = PathEntry;
+      bool IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
+      if (!IsBin) {
+        // Strip any architecture subdir like "amd64".
+        TestPath = llvm::sys::path::parent_path(TestPath);
+        IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
+      }
+      if (IsBin) {
+        llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath);
         if (llvm::sys::path::filename(ParentPath) == "VC") {
           Path = ParentPath;
           IsVS2017OrNewer = false;
diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp
index 32ee9d3e9961..6ee211c2de67 100644
--- a/lib/Frontend/ASTUnit.cpp
+++ b/lib/Frontend/ASTUnit.cpp
@@ -84,13 +84,6 @@ namespace {
     /// \brief The file in which the precompiled preamble is stored.
     std::string PreambleFile;
 
-    /// \brief Temporary files that should be removed when the ASTUnit is
-    /// destroyed.
-    SmallVector<std::string, 4> TemporaryFiles;
-
-    /// \brief Erase temporary files.
-    void CleanTemporaryFiles();
-
     /// \brief Erase the preamble file.
     void CleanPreambleFile();
 
@@ -163,12 +156,6 @@ static const std::string &getPreambleFile(const ASTUnit *AU) {
   return getOnDiskData(AU).PreambleFile;  
 }
 
-void OnDiskData::CleanTemporaryFiles() {
-  for (StringRef File : TemporaryFiles)
-    llvm::sys::fs::remove(File);
-  TemporaryFiles.clear();
-}
-
 void OnDiskData::CleanPreambleFile() {
   if (!PreambleFile.empty()) {
     llvm::sys::fs::remove(PreambleFile);
@@ -177,7 +164,6 @@ void OnDiskData::CleanPreambleFile() {
 }
 
 void OnDiskData::Cleanup() {
-  CleanTemporaryFiles();
   CleanPreambleFile();
 }
 
@@ -194,14 +180,6 @@ void ASTUnit::clearFileLevelDecls() {
   llvm::DeleteContainerSeconds(FileDecls);
 }
 
-void ASTUnit::CleanTemporaryFiles() {
-  getOnDiskData(this).CleanTemporaryFiles();
-}
-
-void ASTUnit::addTemporaryFile(StringRef TempFile) {
-  getOnDiskData(this).TemporaryFiles.push_back(TempFile);
-}
-
 /// \brief After failing to build a precompiled preamble (due to
 /// errors in the source that occurs in the preamble), the number of
 /// reparses during which we'll skip even trying to precompile the
@@ -1100,7 +1078,6 @@ bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
   // Clear out old caches and data.
   TopLevelDecls.clear();
   clearFileLevelDecls();
-  CleanTemporaryFiles();
 
   if (!OverrideMainBuffer) {
     checkAndRemoveNonDriverDiags(StoredDiagnostics);
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 9773acb840a5..5a1c572ce614 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -2133,7 +2133,7 @@ void _mm_sfence(void);
 /// \headerfile <x86intrin.h>
 ///
 /// \code
-/// void _mm_extract_pi(__m64 a, int n);
+/// int _mm_extract_pi16(__m64 a, int n);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
@@ -2157,7 +2157,7 @@ void _mm_sfence(void);
 /// \headerfile <x86intrin.h>
 ///
 /// \code
-/// void _mm_insert_pi(__m64 a, int d, int n);
+/// __m64 _mm_insert_pi16(__m64 a, int d, int n);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
@@ -2680,8 +2680,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPI2PS + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [4 x i16]. The elements of the destination are copied
@@ -2711,8 +2710,7 @@ _mm_cvtpi16_ps(__m64 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPI2PS + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of 16-bit unsigned integer values. The elements of the
@@ -2741,8 +2739,7 @@ _mm_cvtpu16_ps(__m64 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPI2PS + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [8 x i8]. The elements of the destination are copied
@@ -2766,8 +2763,7 @@ _mm_cvtpi8_ps(__m64 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPI2PS + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of unsigned 8-bit integer values. The elements of the
@@ -2791,8 +2787,7 @@ _mm_cvtpu8_ps(__m64 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPI2PS + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 64-bit vector of [2 x i32]. The lower elements of the destination are
@@ -2826,8 +2821,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPS2PI + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    A 128-bit floating-point vector of [4 x float].
@@ -2857,8 +2851,7 @@ _mm_cvtps_pi16(__m128 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> CVTPS2PI + \c COMPOSITE </c>
-///   instruction.
+/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.
 ///
 /// \param __a
 ///    128-bit floating-point vector of [4 x float].
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3d6fe91115a9..92942fd09a0c 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -43,6 +43,8 @@ using namespace clang;
 
 /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
 bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+  if (isAnnotation())
+    return false;
   if (IdentifierInfo *II = getIdentifierInfo())
     return II->getObjCKeywordID() == objcKey;
   return false;
@@ -50,6 +52,8 @@ bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
 
 /// getObjCKeywordID - Return the ObjC keyword kind.
 tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+  if (isAnnotation())
+    return tok::objc_not_keyword;
   IdentifierInfo *specId = getIdentifierInfo();
   return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
 }
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index ca1d27e9505f..e7b0914641ff 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -93,11 +93,10 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       ValueWithBytesObjCTypeMethod(nullptr), NSArrayDecl(nullptr),
       ArrayWithObjectsMethod(nullptr), NSDictionaryDecl(nullptr),
       DictionaryWithObjectsMethod(nullptr), GlobalNewDeleteDeclared(false),
-      TUKind(TUKind), NumSFINAEErrors(0), CachedFakeTopLevelModule(nullptr),
-      AccessCheckingSFINAE(false), InNonInstantiationSFINAEContext(false),
-      NonInstantiationEntries(0), ArgumentPackSubstitutionIndex(-1),
-      CurrentInstantiationScope(nullptr), DisableTypoCorrection(false),
-      TyposCorrected(0), AnalysisWarnings(*this),
+      TUKind(TUKind), NumSFINAEErrors(0), AccessCheckingSFINAE(false),
+      InNonInstantiationSFINAEContext(false), NonInstantiationEntries(0),
+      ArgumentPackSubstitutionIndex(-1), CurrentInstantiationScope(nullptr),
+      DisableTypoCorrection(false), TyposCorrected(0), AnalysisWarnings(*this),
       ThreadSafetyDeclCache(nullptr), VarDataSharingAttributesStack(nullptr),
       CurScope(nullptr), Ident_super(nullptr), Ident___float128(nullptr) {
   TUScope = nullptr;
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index 2e069a9defaa..dca51b0e8c8e 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -16047,6 +16047,14 @@ void Sema::ActOnModuleBegin(SourceLocation DirectiveLoc, Module *Mod) {
     ModuleScopes.back().OuterVisibleModules = std::move(VisibleModules);
 
   VisibleModules.setVisible(Mod, DirectiveLoc);
+
+  // The enclosing context is now part of this module.
+  // FIXME: Consider creating a child DeclContext to hold the entities
+  // lexically within the module.
+  if (getLangOpts().trackLocalOwningModule()) {
+    cast<Decl>(CurContext)->setHidden(true);
+    cast<Decl>(CurContext)->setLocalOwningModule(Mod);
+  }
 }
 
 void Sema::ActOnModuleEnd(SourceLocation EomLoc, Module *Mod) {
@@ -16075,6 +16083,13 @@ void Sema::ActOnModuleEnd(SourceLocation EomLoc, Module *Mod) {
     DirectiveLoc = EomLoc;
   }
   BuildModuleInclude(DirectiveLoc, Mod);
+
+  // Any further declarations are in whatever module we returned to.
+  if (getLangOpts().trackLocalOwningModule()) {
+    cast<Decl>(CurContext)->setLocalOwningModule(getCurrentModule());
+    if (!getCurrentModule())
+      cast<Decl>(CurContext)->setHidden(false);
+  }
 }
 
 void Sema::createImplicitModuleImportForErrorRecovery(SourceLocation Loc,
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index c5b579a4b2e9..04350831c681 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -1326,62 +1326,6 @@ bool Sema::CppLookupName(LookupResult &R, Scope *S) {
   return !R.empty();
 }
 
-Module *Sema::getOwningModule(Decl *Entity) {
-  // If it's imported, grab its owning module.
-  Module *M = Entity->getImportedOwningModule();
-  if (M || !isa<NamedDecl>(Entity) || !cast<NamedDecl>(Entity)->isHidden())
-    return M;
-  assert(!Entity->isFromASTFile() &&
-         "hidden entity from AST file has no owning module");
-
-  if (!getLangOpts().ModulesLocalVisibility) {
-    // If we're not tracking visibility locally, the only way a declaration
-    // can be hidden and local is if it's hidden because it's parent is (for
-    // instance, maybe this is a lazily-declared special member of an imported
-    // class).
-    auto *Parent = cast<NamedDecl>(Entity->getDeclContext());
-    assert(Parent->isHidden() && "unexpectedly hidden decl");
-    return getOwningModule(Parent);
-  }
-
-  // It's local and hidden; grab or compute its owning module.
-  M = Entity->getLocalOwningModule();
-  if (M)
-    return M;
-
-  if (auto *Containing =
-          PP.getModuleContainingLocation(Entity->getLocation())) {
-    M = Containing;
-  } else if (Entity->isInvalidDecl() || Entity->getLocation().isInvalid()) {
-    // Don't bother tracking visibility for invalid declarations with broken
-    // locations.
-    cast<NamedDecl>(Entity)->setHidden(false);
-  } else {
-    // We need to assign a module to an entity that exists outside of any
-    // module, so that we can hide it from modules that we textually enter.
-    // Invent a fake module for all such entities.
-    if (!CachedFakeTopLevelModule) {
-      CachedFakeTopLevelModule =
-          PP.getHeaderSearchInfo().getModuleMap().findOrCreateModule(
-              "<top-level>", nullptr, false, false).first;
-
-      auto &SrcMgr = PP.getSourceManager();
-      SourceLocation StartLoc =
-          SrcMgr.getLocForStartOfFile(SrcMgr.getMainFileID());
-      auto &TopLevel = ModuleScopes.empty()
-                           ? VisibleModules
-                           : ModuleScopes[0].OuterVisibleModules;
-      TopLevel.setVisible(CachedFakeTopLevelModule, StartLoc);
-    }
-
-    M = CachedFakeTopLevelModule;
-  }
-
-  if (M)
-    Entity->setLocalOwningModule(M);
-  return M;
-}
-
 void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
   if (auto *M = getCurrentModule())
     Context.mergeDefinitionIntoModule(ND, M);
@@ -1520,7 +1464,6 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
   if (SemaRef.getLangOpts().ModulesLocalVisibility) {
     DeclModule = SemaRef.getOwningModule(D);
     if (!DeclModule) {
-      // getOwningModule() may have decided the declaration should not be hidden.
       assert(!D->isHidden() && "hidden decl not from a module");
       return true;
     }
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index ef8481488302..5cabd0e6740d 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -9348,12 +9348,6 @@ void ASTReader::diagnoseOdrViolations() {
         return Hash.CalculateHash();
       };
 
-      auto ComputeDeclNameODRHash = [&Hash](const DeclarationName Name) {
-        Hash.clear();
-        Hash.AddDeclarationName(Name);
-        return Hash.CalculateHash();
-      };
-
       auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
         Hash.clear();
         Hash.AddQualType(Ty);
@@ -9446,11 +9440,8 @@ void ASTReader::diagnoseOdrViolations() {
 
         QualType FirstType = FirstField->getType();
         QualType SecondType = SecondField->getType();
-        const TypedefType *FirstTypedef = dyn_cast<TypedefType>(FirstType);
-        const TypedefType *SecondTypedef = dyn_cast<TypedefType>(SecondType);
-
-        if ((FirstTypedef && !SecondTypedef) ||
-            (!FirstTypedef && SecondTypedef)) {
+        if (ComputeQualTypeODRHash(FirstType) !=
+            ComputeQualTypeODRHash(SecondType)) {
           ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(),
                        FieldTypeName)
               << FirstII << FirstType;
@@ -9462,24 +9453,6 @@ void ASTReader::diagnoseOdrViolations() {
           break;
         }
 
-        if (FirstTypedef && SecondTypedef) {
-          unsigned FirstHash = ComputeDeclNameODRHash(
-              FirstTypedef->getDecl()->getDeclName());
-          unsigned SecondHash = ComputeDeclNameODRHash(
-              SecondTypedef->getDecl()->getDeclName());
-          if (FirstHash != SecondHash) {
-            ODRDiagError(FirstField->getLocation(),
-                         FirstField->getSourceRange(), FieldTypeName)
-                << FirstII << FirstType;
-            ODRDiagNote(SecondField->getLocation(),
-                        SecondField->getSourceRange(), FieldTypeName)
-                << SecondII << SecondType;
-
-            Diagnosed = true;
-            break;
-          }
-        }
-
         const bool IsFirstBitField = FirstField->isBitField();
         const bool IsSecondBitField = SecondField->isBitField();
         if (IsFirstBitField != IsSecondBitField) {
diff --git a/test/CodeGenCXX/debug-info-namespace.cpp b/test/CodeGenCXX/debug-info-namespace.cpp
index 5b81197671e2..95857e339085 100644
--- a/test/CodeGenCXX/debug-info-namespace.cpp
+++ b/test/CodeGenCXX/debug-info-namespace.cpp
@@ -60,6 +60,10 @@ void B::func_fwd() {
   anonymous = 0;
 }
 
+namespace C {
+  void c();
+}
+void C::c() {}
 
 // This should work even if 'i' and 'func' were declarations & not definitions,
 // but it doesn't yet.
@@ -114,6 +118,8 @@ void B::func_fwd() {
 // CHECK: [[M16]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[FUNC]], entity: [[FUNC_FWD:![0-9]+]]
 // CHECK: [[FUNC_FWD]] = distinct !DISubprogram(name: "func_fwd",{{.*}} line: 53,{{.*}} isDefinition: true
 // CHECK: [[M17]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[CTXT]], entity: [[I]]
+// CHECK: distinct !DISubprogram(name: "c",{{.*}}, scope: ![[C:[0-9]+]],{{.*}}, line: 60,{{.*}} isDefinition: true
+// CHECK: ![[C]] = !DINamespace(name: "C",
 
 // CHECK-GMLT: [[CU:![0-9]+]] = distinct !DICompileUnit(
 // CHECK-GMLT-SAME:                            emissionKind: LineTablesOnly,
diff --git a/test/Modules/Inputs/module.map b/test/Modules/Inputs/module.map
index 7416d7008b85..c0fe6c557f22 100644
--- a/test/Modules/Inputs/module.map
+++ b/test/Modules/Inputs/module.map
@@ -441,3 +441,7 @@ module DebugNestedB {
   header "DebugNestedB.h"
   export *
 }
+
+module objcAtKeywordMissingEnd {
+  header "objcAtKeywordMissingEnd.h"
+}
diff --git a/test/Modules/Inputs/objcAtKeywordMissingEnd.h b/test/Modules/Inputs/objcAtKeywordMissingEnd.h
new file mode 100644
index 000000000000..1196b87eef8e
--- /dev/null
+++ b/test/Modules/Inputs/objcAtKeywordMissingEnd.h
@@ -0,0 +1,3 @@
+@interface MissingEnd // expected-note {{class started here}}
+
+@ // expected-error {{expected an Objective-C directive after '@'}} expected-error {{missing '@end'}}
diff --git a/test/Modules/Inputs/submodule-visibility/b.h b/test/Modules/Inputs/submodule-visibility/b.h
index 67ef6529dbd8..39df6a02cb40 100644
--- a/test/Modules/Inputs/submodule-visibility/b.h
+++ b/test/Modules/Inputs/submodule-visibility/b.h
@@ -4,7 +4,11 @@ int m = n;
 #include "c.h"
 
 #if defined(A) && !defined(ALLOW_NAME_LEAKAGE)
-#error A is defined
+#warning A is defined
 #endif
 
 #define B
+
+template<typename T> void b_template() {
+  N::C::f(0);
+}
diff --git a/test/Modules/Inputs/submodule-visibility/other.h b/test/Modules/Inputs/submodule-visibility/other.h
index f40c757ca62e..4b68c489153c 100644
--- a/test/Modules/Inputs/submodule-visibility/other.h
+++ b/test/Modules/Inputs/submodule-visibility/other.h
@@ -1 +1,10 @@
 #include "c.h"
+
+#ifndef OTHER_H
+#define OTHER_H
+namespace N {
+  struct C {
+    template<typename U> static void f(U) {}
+  };
+}
+#endif
diff --git a/test/Modules/objc-at-keyword.m b/test/Modules/objc-at-keyword.m
new file mode 100644
index 000000000000..0e058a309017
--- /dev/null
+++ b/test/Modules/objc-at-keyword.m
@@ -0,0 +1,7 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -verify -x objective-c -fmodule-name=objcAtKeywordMissingEnd -emit-module %S/Inputs/module.map
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c -fmodule-name=Empty -emit-module %S/Inputs/module.map
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -verify -I %S/Inputs %s
+
+@interface X // expected-note {{class started here}}
+#pragma clang module import Empty // expected-error {{missing '@end'}}
diff --git a/test/Modules/odr_hash.cpp b/test/Modules/odr_hash.cpp
index 58814dd6b3fb..947583bcfd21 100644
--- a/test/Modules/odr_hash.cpp
+++ b/test/Modules/odr_hash.cpp
@@ -634,6 +634,237 @@ S3 s3;
 #endif
 }  // namespace Using
 
+namespace RecordType {
+#if defined(FIRST)
+struct B1 {};
+struct S1 {
+  B1 x;
+};
+#elif defined(SECOND)
+struct A1 {};
+struct S1 {
+  A1 x;
+};
+#else
+S1 s1;
+// expected-error@first.h:* {{'RecordType::S1::x' from module 'FirstModule' is not present in definition of 'RecordType::S1' in module 'SecondModule'}}
+// expected-note@second.h:* {{declaration of 'x' does not match}}
+#endif
+}
+
+namespace DependentType {
+#if defined(FIRST)
+template <class T>
+class S1 {
+  typename T::typeA x;
+};
+#elif defined(SECOND)
+template <class T>
+class S1 {
+  typename T::typeB x;
+};
+#else
+template<class T>
+using U1 = S1<T>;
+// expected-error@first.h:* {{'DependentType::S1::x' from module 'FirstModule' is not present in definition of 'S1<T>' in module 'SecondModule'}}
+// expected-note@second.h:* {{declaration of 'x' does not match}}
+#endif
+}
+
+namespace ElaboratedType {
+#if defined(FIRST)
+namespace N1 { using type = double; }
+struct S1 {
+  N1::type x;
+};
+#elif defined(SECOND)
+namespace N1 { using type = int; }
+struct S1 {
+  N1::type x;
+};
+#else
+S1 s1;
+// expected-error@first.h:* {{'ElaboratedType::S1::x' from module 'FirstModule' is not present in definition of 'ElaboratedType::S1' in module 'SecondModule'}}
+// expected-note@second.h:* {{declaration of 'x' does not match}}
+#endif
+}
+
+namespace Enum {
+#if defined(FIRST)
+enum A1 {};
+struct S1 {
+  A1 x;
+};
+#elif defined(SECOND)
+enum A2 {};
+struct S1 {
+  A2 x;
+};
+#else
+S1 s1;
+// expected-error@first.h:* {{'Enum::S1::x' from module 'FirstModule' is not present in definition of 'Enum::S1' in module 'SecondModule'}}
+// expected-note@second.h:* {{declaration of 'x' does not match}}
+#endif
+}
+
+namespace NestedNamespaceSpecifier {
+#if defined(FIRST)
+namespace LevelA1 {
+using Type = int;
+}
+
+struct S1 {
+  LevelA1::Type x;
+};
+# elif defined(SECOND)
+namespace LevelB1 {
+namespace LevelC1 {
+using Type = int;
+}
+}
+
+struct S1 {
+  LevelB1::LevelC1::Type x;
+};
+#else
+S1 s1;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S1' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'LevelB1::LevelC1::Type' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'LevelA1::Type' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+namespace LevelA2 { using Type = int; }
+struct S2 {
+  LevelA2::Type x;
+};
+# elif defined(SECOND)
+struct S2 {
+  int x;
+};
+#else
+S2 s2;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S2' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'int'}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'LevelA2::Type' (aka 'int')}}
+#endif
+
+namespace LevelA3 { using Type = int; }
+namespace LevelB3 { using Type = int; }
+#if defined(FIRST)
+struct S3 {
+  LevelA3::Type x;
+};
+# elif defined(SECOND)
+struct S3 {
+  LevelB3::Type x;
+};
+#else
+S3 s3;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S3' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'LevelB3::Type' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'LevelA3::Type' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+struct TA4 { using Type = int; };
+struct S4 {
+  TA4::Type x;
+};
+# elif defined(SECOND)
+struct TB4 { using Type = int; };
+struct S4 {
+  TB4::Type x;
+};
+#else
+S4 s4;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S4' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'TB4::Type' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'TA4::Type' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+struct T5 { using Type = int; };
+struct S5 {
+  T5::Type x;
+};
+# elif defined(SECOND)
+namespace T5 { using Type = int; };
+struct S5 {
+  T5::Type x;
+};
+#else
+S5 s5;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S5' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'T5::Type' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'T5::Type' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+namespace N6 {using I = int;}
+struct S6 {
+  NestedNamespaceSpecifier::N6::I x;
+};
+# elif defined(SECOND)
+using I = int;
+struct S6 {
+  ::NestedNamespaceSpecifier::I x;
+};
+#else
+S6 s6;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S6' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type '::NestedNamespaceSpecifier::I' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'NestedNamespaceSpecifier::N6::I' (aka 'int')}}
+#endif
+
+#if defined(FIRST)
+template <class T, class U>
+class S7 {
+  typename T::type *x = {};
+  int z = x->T::foo();
+};
+#elif defined(SECOND)
+template <class T, class U>
+class S7 {
+  typename T::type *x = {};
+  int z = x->U::foo();
+};
+#else
+template <class T, class U>
+using U7 = S7<T, U>;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S7' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'z' with an initializer}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'z' with a different initializer}}
+#endif
+
+#if defined(FIRST)
+template <class T>
+class S8 {
+  int x = T::template X<int>::value;
+};
+#elif defined(SECOND)
+template <class T>
+class S8 {
+  int x = T::template Y<int>::value;
+};
+#else
+template <class T>
+using U8 = S8<T>;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S8' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with an initializer}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with a different initializer}}
+#endif
+
+#if defined(FIRST)
+namespace N9 { using I = int; }
+namespace O9 = N9;
+struct S9 {
+  O9::I x;
+};
+#elif defined(SECOND)
+namespace N9 { using I = int; }
+namespace P9 = N9;
+struct S9 {
+  P9::I x;
+};
+#else
+S9 s9;
+// expected-error@second.h:* {{'NestedNamespaceSpecifier::S9' has different definitions in different modules; first difference is definition in module 'SecondModule' found field 'x' with type 'P9::I' (aka 'int')}}
+// expected-note@first.h:* {{but in 'FirstModule' found field 'x' with type 'O9::I' (aka 'int')}}
+#endif
+}
 
 // Interesting cases that should not cause errors.  struct S should not error
 // while struct T should error at the access specifier mismatch at the end.
diff --git a/test/Modules/submodule-visibility.cpp b/test/Modules/submodule-visibility.cpp
index 345ae155bb32..4c066e6ab9b0 100644
--- a/test/Modules/submodule-visibility.cpp
+++ b/test/Modules/submodule-visibility.cpp
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -fmodules-cache-path=%t -I%S/Inputs/submodule-visibility -verify %s -DIMPORT
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -fmodules-cache-path=%t -fmodule-name=x -I%S/Inputs/submodule-visibility -verify %s
 // RUN: %clang_cc1 -fimplicit-module-maps -fmodules-local-submodule-visibility -fmodules-cache-path=%t -I%S/Inputs/submodule-visibility -verify %s
+//
+// Explicit module builds.
+// RUN: %clang_cc1 -fmodules -fmodules-local-submodule-visibility -emit-module -x c++-module-map %S/Inputs/submodule-visibility/module.modulemap -fmodule-name=other -o %t/other.pcm
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%S/Inputs/submodule-visibility/module.modulemap -fmodules-local-submodule-visibility -fmodule-file=%t/other.pcm -verify -fmodule-name=x -I%S/Inputs/submodule-visibility %s
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%S/Inputs/submodule-visibility/module.modulemap -fmodule-file=%t/other.pcm -verify -fmodule-name=x -I%S/Inputs/submodule-visibility %s -DALLOW_TEXTUAL_NAME_LEAKAGE
 
 #include "a.h"
 #include "b.h"
@@ -11,6 +16,8 @@
 // expected-no-diagnostics
 #elif IMPORT
 // expected-error@-6 {{could not build module 'x'}}
+#elif ALLOW_TEXTUAL_NAME_LEAKAGE
+// expected-warning@b.h:7 {{A is defined}}
 #else
 // The use of -fmodule-name=x causes us to textually include the above headers.
 // The submodule visibility rules are still applied in this case.
@@ -35,3 +42,5 @@ typedef struct {
   int p;                 
   void (*f)(int p);                                                                       
 } name_for_linkage;
+
+void g() { b_template<int>(); }

From 834763c1a4849df24fd4f9b466d0c160cf7ed44b Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:22:58 +0000
Subject: [PATCH 6/9] Vendor import of compiler-rt trunk r303291:
 https://llvm.org/svn/llvm-project/compiler-rt/trunk@303291

---
 lib/asan/tests/asan_test_utils.h |  2 +-
 lib/builtins/floatdidf.c         |  2 +-
 lib/ubsan/CMakeLists.txt         | 25 +++++++++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index f16d939c94aa..c292467220d4 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -30,11 +30,11 @@
 #include <stdint.h>
 #include <assert.h>
 #include <algorithm>
+#include <setjmp.h>
 
 #if !defined(_WIN32)
 # include <strings.h>
 # include <sys/mman.h>
-# include <setjmp.h>
 #endif
 
 #ifdef __linux__
diff --git a/lib/builtins/floatdidf.c b/lib/builtins/floatdidf.c
index fccb29072407..681fecef9682 100644
--- a/lib/builtins/floatdidf.c
+++ b/lib/builtins/floatdidf.c
@@ -104,7 +104,7 @@ __floatdidf(di_int a)
 }
 #endif
 
-#if defined(__AEABI__)
+#if defined(__ARM_EABI__)
 AEABI_RTABI double __aeabi_l2d(di_int a) {
   return __floatdidf(a);
 }
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index 7e10456e3142..457a6b47525d 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -34,6 +34,10 @@ set(UBSAN_CXXFLAGS ${SANITIZER_COMMON_CFLAGS})
 append_rtti_flag(ON UBSAN_CXXFLAGS)
 append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_CXXFLAGS)
 
+append_list_if(COMPILER_RT_HAS_LIBDL dl UBSAN_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBRT rt UBSAN_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread UBSAN_DYNAMIC_LIBS)
+
 add_compiler_rt_component(ubsan)
 
 if(APPLE)
@@ -144,6 +148,27 @@ else()
       CFLAGS ${UBSAN_CXXFLAGS}
       PARENT_TARGET ubsan)
 
+    add_compiler_rt_runtime(clang_rt.ubsan_standalone
+      SHARED
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTSanitizerCommon
+              RTSanitizerCommonLibc
+              RTUbsan
+      CFLAGS ${UBSAN_CFLAGS}
+      LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
+      PARENT_TARGET ubsan)
+
+    add_compiler_rt_runtime(clang_rt.ubsan_standalone_cxx
+      SHARED
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTSanitizerCommon
+              RTSanitizerCommonLibc
+              RTUbsan
+              RTUbsan_cxx
+      CFLAGS ${UBSAN_CXXFLAGS}
+      LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
+      PARENT_TARGET ubsan)
+
     if (UNIX)
       set(ARCHS_FOR_SYMBOLS ${UBSAN_SUPPORTED_ARCH})
       list(REMOVE_ITEM ARCHS_FOR_SYMBOLS i386 i686)

From 318f9824bdefe5bdd518730a92469f8fd0e61ff6 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:23:06 +0000
Subject: [PATCH 7/9] Vendor import of libc++ trunk r303291:
 https://llvm.org/svn/llvm-project/libcxx/trunk@303291

---
 include/iterator                              |  37 +++--
 include/optional                              |   4 +-
 .../iterator.operations/advance.pass.cpp      |  33 ++++-
 .../iterator.operations/distance.pass.cpp     |  21 +++
 .../iterator.operations/next.pass.cpp         |  34 +++++
 .../iterator.operations/prev.pass.cpp         |  31 +++++
 .../optional.object.ctor/copy.pass.cpp        |   7 +-
 .../optional.object.ctor/move.pass.cpp        |   7 +-
 test/support/test_iterators.h                 | 128 +++++++++---------
 9 files changed, 217 insertions(+), 85 deletions(-)

diff --git a/include/iterator b/include/iterator
index 47a7811a3004..4aa44746dc96 100644
--- a/include/iterator
+++ b/include/iterator
@@ -64,14 +64,23 @@ struct forward_iterator_tag       : public input_iterator_tag         {};
 struct bidirectional_iterator_tag : public forward_iterator_tag       {};
 struct random_access_iterator_tag : public bidirectional_iterator_tag {};
 
+// 27.4.3, iterator operations
 // extension: second argument not conforming to C++03
-template <class InputIterator>
-void advance(InputIterator& i,
+template <class InputIterator>  // constexpr in C++17
+  constexpr void advance(InputIterator& i,
              typename iterator_traits<InputIterator>::difference_type n);
 
-template <class InputIterator>
-typename iterator_traits<InputIterator>::difference_type
-distance(InputIterator first, InputIterator last);
+template <class InputIterator>  // constexpr in C++17
+  constexpr typename iterator_traits<InputIterator>::difference_type
+    distance(InputIterator first, InputIterator last);
+
+template <class InputIterator>  // constexpr in C++17
+  constexpr InputIterator next(InputIterator x,
+typename iterator_traits<InputIterator>::difference_type n = 1);
+
+template <class BidirectionalIterator>  // constexpr in C++17
+  constexpr BidirectionalIterator prev(BidirectionalIterator x,
+    typename iterator_traits<BidirectionalIterator>::difference_type n = 1);    
 
 template <class Iterator>
 class reverse_iterator
@@ -529,7 +538,7 @@ struct _LIBCPP_TEMPLATE_VIS iterator
 };
 
 template <class _InputIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 void __advance(_InputIter& __i,
              typename iterator_traits<_InputIter>::difference_type __n, input_iterator_tag)
 {
@@ -538,7 +547,7 @@ void __advance(_InputIter& __i,
 }
 
 template <class _BiDirIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 void __advance(_BiDirIter& __i,
              typename iterator_traits<_BiDirIter>::difference_type __n, bidirectional_iterator_tag)
 {
@@ -551,7 +560,7 @@ void __advance(_BiDirIter& __i,
 }
 
 template <class _RandIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 void __advance(_RandIter& __i,
              typename iterator_traits<_RandIter>::difference_type __n, random_access_iterator_tag)
 {
@@ -559,7 +568,7 @@ void __advance(_RandIter& __i,
 }
 
 template <class _InputIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 void advance(_InputIter& __i,
              typename iterator_traits<_InputIter>::difference_type __n)
 {
@@ -567,7 +576,7 @@ void advance(_InputIter& __i,
 }
 
 template <class _InputIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 typename iterator_traits<_InputIter>::difference_type
 __distance(_InputIter __first, _InputIter __last, input_iterator_tag)
 {
@@ -578,7 +587,7 @@ __distance(_InputIter __first, _InputIter __last, input_iterator_tag)
 }
 
 template <class _RandIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 typename iterator_traits<_RandIter>::difference_type
 __distance(_RandIter __first, _RandIter __last, random_access_iterator_tag)
 {
@@ -586,7 +595,7 @@ __distance(_RandIter __first, _RandIter __last, random_access_iterator_tag)
 }
 
 template <class _InputIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 typename iterator_traits<_InputIter>::difference_type
 distance(_InputIter __first, _InputIter __last)
 {
@@ -594,7 +603,7 @@ distance(_InputIter __first, _InputIter __last)
 }
 
 template <class _InputIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 _InputIter
 next(_InputIter __x,
      typename iterator_traits<_InputIter>::difference_type __n = 1,
@@ -605,7 +614,7 @@ next(_InputIter __x,
 }
 
 template <class _BidiretionalIter>
-inline _LIBCPP_INLINE_VISIBILITY
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 _BidiretionalIter
 prev(_BidiretionalIter __x,
      typename iterator_traits<_BidiretionalIter>::difference_type __n = 1,
diff --git a/include/optional b/include/optional
index 8c7a242113a0..70b6eb44dcd6 100644
--- a/include/optional
+++ b/include/optional
@@ -599,8 +599,8 @@ private:
 public:
 
     _LIBCPP_INLINE_VISIBILITY constexpr optional() noexcept {}
-    _LIBCPP_INLINE_VISIBILITY optional(const optional&) = default;
-    _LIBCPP_INLINE_VISIBILITY optional(optional&&) = default;
+    _LIBCPP_INLINE_VISIBILITY constexpr optional(const optional&) = default;
+    _LIBCPP_INLINE_VISIBILITY constexpr optional(optional&&) = default;
     _LIBCPP_INLINE_VISIBILITY constexpr optional(nullopt_t) noexcept {}
 
     template <class... _Args, class = enable_if_t<
diff --git a/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
index e395da299dc7..e5bd5603b8e3 100644
--- a/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
+++ b/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
@@ -9,14 +9,16 @@
 
 // <iterator>
 
+//   All of these became constexpr in C++17
+//
 // template <InputIterator Iter>
-//   void advance(Iter& i, Iter::difference_type n);
+//   constexpr void advance(Iter& i, Iter::difference_type n);
 //
 // template <BidirectionalIterator Iter>
-//   void advance(Iter& i, Iter::difference_type n);
+//   constexpr void advance(Iter& i, Iter::difference_type n);
 //
 // template <RandomAccessIterator Iter>
-//   void advance(Iter& i, Iter::difference_type n);
+//   constexpr void advance(Iter& i, Iter::difference_type n);
 
 #include <iterator>
 #include <cassert>
@@ -31,8 +33,19 @@ test(It i, typename std::iterator_traits<It>::difference_type n, It x)
     assert(i == x);
 }
 
+#if TEST_STD_VER > 14
+template <class It>
+constexpr bool 
+constepxr_test(It i, typename std::iterator_traits<It>::difference_type n, It x)
+{
+    std::advance(i, n);
+    return i == x;
+}
+#endif
+
 int main()
 {
+    {
     const char* s = "1234567890";
     test(input_iterator<const char*>(s), 10, input_iterator<const char*>(s+10));
     test(forward_iterator<const char*>(s), 10, forward_iterator<const char*>(s+10));
@@ -42,4 +55,18 @@ int main()
     test(random_access_iterator<const char*>(s+5), -5, random_access_iterator<const char*>(s));
     test(s+5, 5, s+10);
     test(s+5, -5, s);
+    }
+#if TEST_STD_VER > 14
+    {
+    constexpr const char* s = "1234567890";
+    static_assert( constepxr_test(input_iterator<const char*>(s), 10, input_iterator<const char*>(s+10)), "" );
+    static_assert( constepxr_test(forward_iterator<const char*>(s), 10, forward_iterator<const char*>(s+10)), "" );
+    static_assert( constepxr_test(bidirectional_iterator<const char*>(s+5), 5, bidirectional_iterator<const char*>(s+10)), "" );
+    static_assert( constepxr_test(bidirectional_iterator<const char*>(s+5), -5, bidirectional_iterator<const char*>(s)), "" );
+    static_assert( constepxr_test(random_access_iterator<const char*>(s+5), 5, random_access_iterator<const char*>(s+10)), "" );
+    static_assert( constepxr_test(random_access_iterator<const char*>(s+5), -5, random_access_iterator<const char*>(s)), "" );
+    static_assert( constepxr_test(s+5, 5, s+10), "" );
+    static_assert( constepxr_test(s+5, -5, s), "" );
+    }
+#endif
 }
diff --git a/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
index 7fef635838c3..2f16fcb38236 100644
--- a/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
+++ b/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
@@ -29,12 +29,33 @@ test(It first, It last, typename std::iterator_traits<It>::difference_type x)
     assert(std::distance(first, last) == x);
 }
 
+#if TEST_STD_VER > 14
+template <class It>
+constexpr bool
+constexpr_test(It first, It last, typename std::iterator_traits<It>::difference_type x)
+{
+    return std::distance(first, last) == x;
+}
+#endif
+
 int main()
 {
+    {
     const char* s = "1234567890";
     test(input_iterator<const char*>(s), input_iterator<const char*>(s+10), 10);
     test(forward_iterator<const char*>(s), forward_iterator<const char*>(s+10), 10);
     test(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+10), 10);
     test(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+10), 10);
     test(s, s+10, 10);
+    }
+#if TEST_STD_VER > 14
+    {
+    constexpr const char* s = "1234567890";
+    static_assert( constexpr_test(input_iterator<const char*>(s), input_iterator<const char*>(s+10), 10), "");
+    static_assert( constexpr_test(forward_iterator<const char*>(s), forward_iterator<const char*>(s+10), 10), "");
+    static_assert( constexpr_test(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+10), 10), "");
+    static_assert( constexpr_test(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+10), 10), "");
+    static_assert( constexpr_test(s, s+10, 10), "");
+    }
+#endif
 }
diff --git a/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
index 0952588b0cb1..e257b3eaaf31 100644
--- a/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
+++ b/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
@@ -33,8 +33,25 @@ test(It i, It x)
     assert(std::next(i) == x);
 }
 
+#if TEST_STD_VER > 14
+template <class It>
+constexpr bool
+constexpr_test(It i, typename std::iterator_traits<It>::difference_type n, It x)
+{
+    return std::next(i, n) == x;
+}
+
+template <class It>
+constexpr bool
+constexpr_test(It i, It x)
+{
+    return std::next(i) == x;
+}
+#endif
+
 int main()
 {
+    {
     const char* s = "1234567890";
     test(input_iterator<const char*>(s), 10, input_iterator<const char*>(s+10));
     test(forward_iterator<const char*>(s), 10, forward_iterator<const char*>(s+10));
@@ -47,4 +64,21 @@ int main()
     test(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+1));
     test(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+1));
     test(s, s+1);
+    }
+#if TEST_STD_VER > 14
+    {
+    constexpr const char* s = "1234567890";
+    static_assert( constexpr_test(input_iterator<const char*>(s), 10, input_iterator<const char*>(s+10)), "" );
+    static_assert( constexpr_test(forward_iterator<const char*>(s), 10, forward_iterator<const char*>(s+10)), "" );
+    static_assert( constexpr_test(bidirectional_iterator<const char*>(s), 10, bidirectional_iterator<const char*>(s+10)), "" );
+    static_assert( constexpr_test(random_access_iterator<const char*>(s), 10, random_access_iterator<const char*>(s+10)), "" );
+    static_assert( constexpr_test(s, 10, s+10), "" );
+
+    static_assert( constexpr_test(input_iterator<const char*>(s), input_iterator<const char*>(s+1)), "" );
+    static_assert( constexpr_test(forward_iterator<const char*>(s), forward_iterator<const char*>(s+1)), "" );
+    static_assert( constexpr_test(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+1)), "" );
+    static_assert( constexpr_test(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+1)), "" );
+    static_assert( constexpr_test(s, s+1), "" );
+    }
+#endif
 }
diff --git a/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
index 0641706c928b..465cda1c47e4 100644
--- a/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
+++ b/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
@@ -31,8 +31,25 @@ test(It i, It x)
     assert(std::prev(i) == x);
 }
 
+#if TEST_STD_VER > 14
+template <class It>
+constexpr bool
+constexpr_test(It i, typename std::iterator_traits<It>::difference_type n, It x)
+{
+    return std::prev(i, n) == x;
+}
+
+template <class It>
+constexpr bool
+constexpr_test(It i, It x)
+{
+    return std::prev(i) == x;
+}
+#endif
+
 int main()
 {
+    {
     const char* s = "1234567890";
     test(bidirectional_iterator<const char*>(s+10), 10, bidirectional_iterator<const char*>(s));
     test(random_access_iterator<const char*>(s+10), 10, random_access_iterator<const char*>(s));
@@ -41,4 +58,18 @@ int main()
     test(bidirectional_iterator<const char*>(s+1), bidirectional_iterator<const char*>(s));
     test(random_access_iterator<const char*>(s+1), random_access_iterator<const char*>(s));
     test(s+1, s);
+    }
+#if TEST_STD_VER > 14
+    {
+    constexpr const char* s = "1234567890";
+    static_assert( constexpr_test(bidirectional_iterator<const char*>(s+10), 10, bidirectional_iterator<const char*>(s)), "" );
+    static_assert( constexpr_test(random_access_iterator<const char*>(s+10), 10, random_access_iterator<const char*>(s)), "" );
+    static_assert( constexpr_test(s+10, 10, s), "" );
+
+    static_assert( constexpr_test(bidirectional_iterator<const char*>(s+1), bidirectional_iterator<const char*>(s)), "" );
+    static_assert( constexpr_test(random_access_iterator<const char*>(s+1), random_access_iterator<const char*>(s)), "" );
+    static_assert( constexpr_test(s+1, s), "" );
+    }
+#endif
+    
 }
diff --git a/test/std/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp b/test/std/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
index 5906d4edd119..76c1fb82b866 100644
--- a/test/std/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
+++ b/test/std/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
@@ -10,7 +10,7 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // <optional>
 
-// optional(const optional<T>& rhs);
+// constexpr optional(const optional<T>& rhs);
 
 #include <optional>
 #include <type_traits>
@@ -152,4 +152,9 @@ int main()
     {
         test_reference_extension();
     }
+    {
+    constexpr std::optional<int> o1{4};
+    constexpr std::optional<int> o2 = o1;
+    static_assert( *o2 == 4, "" );
+    }
 }
diff --git a/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index 9f23e9b4381b..09aaa0561b51 100644
--- a/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -18,7 +18,7 @@
 
 // <optional>
 
-// optional(optional<T>&& rhs);
+// constexpr optional(optional<T>&& rhs);
 
 #include <optional>
 #include <type_traits>
@@ -206,4 +206,9 @@ int main()
     {
         test_reference_extension();
     }
+    {
+    constexpr std::optional<int> o1{4};
+    constexpr std::optional<int> o2 = std::move(o1);
+    static_assert( *o2 == 4, "" );
+    }
 }
diff --git a/test/support/test_iterators.h b/test/support/test_iterators.h
index a2c22b09de84..0fdb225b2c71 100644
--- a/test/support/test_iterators.h
+++ b/test/support/test_iterators.h
@@ -68,23 +68,23 @@ class input_iterator
     typedef It                                                 pointer;
     typedef typename Traits::reference                         reference;
 
-    It base() const {return it_;}
+    TEST_CONSTEXPR_CXX14 It base() const {return it_;}
 
-    input_iterator() : it_() {}
-    explicit input_iterator(It it) : it_(it) {}
+    TEST_CONSTEXPR_CXX14 input_iterator() : it_() {}
+    explicit TEST_CONSTEXPR_CXX14 input_iterator(It it) : it_(it) {}
     template <class U, class T>
-        input_iterator(const input_iterator<U, T>& u) :it_(u.it_) {}
+        TEST_CONSTEXPR_CXX14 input_iterator(const input_iterator<U, T>& u) :it_(u.it_) {}
 
-    reference operator*() const {return *it_;}
-    pointer operator->() const {return it_;}
+    TEST_CONSTEXPR_CXX14 reference operator*() const {return *it_;}
+    TEST_CONSTEXPR_CXX14 pointer operator->() const {return it_;}
 
-    input_iterator& operator++() {++it_; return *this;}
-    input_iterator operator++(int)
+    TEST_CONSTEXPR_CXX14 input_iterator& operator++() {++it_; return *this;}
+    TEST_CONSTEXPR_CXX14 input_iterator operator++(int)
         {input_iterator tmp(*this); ++(*this); return tmp;}
 
-    friend bool operator==(const input_iterator& x, const input_iterator& y)
+    friend TEST_CONSTEXPR_CXX14 bool operator==(const input_iterator& x, const input_iterator& y)
         {return x.it_ == y.it_;}
-    friend bool operator!=(const input_iterator& x, const input_iterator& y)
+    friend TEST_CONSTEXPR_CXX14 bool operator!=(const input_iterator& x, const input_iterator& y)
         {return !(x == y);}
 
     template <class T>
@@ -120,23 +120,23 @@ class forward_iterator
     typedef It                                                 pointer;
     typedef typename std::iterator_traits<It>::reference       reference;
 
-    It base() const {return it_;}
+    TEST_CONSTEXPR_CXX14 It base() const {return it_;}
 
-    forward_iterator() : it_() {}
-    explicit forward_iterator(It it) : it_(it) {}
+    TEST_CONSTEXPR_CXX14 forward_iterator() : it_() {}
+    explicit TEST_CONSTEXPR_CXX14 forward_iterator(It it) : it_(it) {}
     template <class U>
-        forward_iterator(const forward_iterator<U>& u) :it_(u.it_) {}
+        TEST_CONSTEXPR_CXX14 forward_iterator(const forward_iterator<U>& u) :it_(u.it_) {}
 
-    reference operator*() const {return *it_;}
-    pointer operator->() const {return it_;}
+    TEST_CONSTEXPR_CXX14 reference operator*() const {return *it_;}
+    TEST_CONSTEXPR_CXX14 pointer operator->() const {return it_;}
 
-    forward_iterator& operator++() {++it_; return *this;}
-    forward_iterator operator++(int)
+    TEST_CONSTEXPR_CXX14 forward_iterator& operator++() {++it_; return *this;}
+    TEST_CONSTEXPR_CXX14 forward_iterator operator++(int)
         {forward_iterator tmp(*this); ++(*this); return tmp;}
 
-    friend bool operator==(const forward_iterator& x, const forward_iterator& y)
+    friend TEST_CONSTEXPR_CXX14 bool operator==(const forward_iterator& x, const forward_iterator& y)
         {return x.it_ == y.it_;}
-    friend bool operator!=(const forward_iterator& x, const forward_iterator& y)
+    friend TEST_CONSTEXPR_CXX14 bool operator!=(const forward_iterator& x, const forward_iterator& y)
         {return !(x == y);}
 
     template <class T>
@@ -145,7 +145,7 @@ class forward_iterator
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator==(const forward_iterator<T>& x, const forward_iterator<U>& y)
 {
     return x.base() == y.base();
@@ -153,7 +153,7 @@ operator==(const forward_iterator<T>& x, const forward_iterator<U>& y)
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator!=(const forward_iterator<T>& x, const forward_iterator<U>& y)
 {
     return !(x == y);
@@ -172,22 +172,22 @@ class bidirectional_iterator
     typedef It                                                 pointer;
     typedef typename std::iterator_traits<It>::reference       reference;
 
-    It base() const {return it_;}
+    TEST_CONSTEXPR_CXX14 It base() const {return it_;}
 
-    bidirectional_iterator() : it_() {}
-    explicit bidirectional_iterator(It it) : it_(it) {}
+    TEST_CONSTEXPR_CXX14 bidirectional_iterator() : it_() {}
+    explicit TEST_CONSTEXPR_CXX14 bidirectional_iterator(It it) : it_(it) {}
     template <class U>
-        bidirectional_iterator(const bidirectional_iterator<U>& u) :it_(u.it_) {}
+        TEST_CONSTEXPR_CXX14 bidirectional_iterator(const bidirectional_iterator<U>& u) :it_(u.it_) {}
 
-    reference operator*() const {return *it_;}
-    pointer operator->() const {return it_;}
+    TEST_CONSTEXPR_CXX14 reference operator*() const {return *it_;}
+    TEST_CONSTEXPR_CXX14 pointer operator->() const {return it_;}
 
-    bidirectional_iterator& operator++() {++it_; return *this;}
-    bidirectional_iterator operator++(int)
+    TEST_CONSTEXPR_CXX14 bidirectional_iterator& operator++() {++it_; return *this;}
+    TEST_CONSTEXPR_CXX14 bidirectional_iterator operator++(int)
         {bidirectional_iterator tmp(*this); ++(*this); return tmp;}
 
-    bidirectional_iterator& operator--() {--it_; return *this;}
-    bidirectional_iterator operator--(int)
+    TEST_CONSTEXPR_CXX14 bidirectional_iterator& operator--() {--it_; return *this;}
+    TEST_CONSTEXPR_CXX14 bidirectional_iterator operator--(int)
         {bidirectional_iterator tmp(*this); --(*this); return tmp;}
 
     template <class T>
@@ -196,7 +196,7 @@ class bidirectional_iterator
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator==(const bidirectional_iterator<T>& x, const bidirectional_iterator<U>& y)
 {
     return x.base() == y.base();
@@ -204,7 +204,7 @@ operator==(const bidirectional_iterator<T>& x, const bidirectional_iterator<U>&
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator!=(const bidirectional_iterator<T>& x, const bidirectional_iterator<U>& y)
 {
     return !(x == y);
@@ -223,34 +223,34 @@ class random_access_iterator
     typedef It                                                 pointer;
     typedef typename std::iterator_traits<It>::reference       reference;
 
-    It base() const {return it_;}
+    TEST_CONSTEXPR_CXX14 It base() const {return it_;}
 
-    random_access_iterator() : it_() {}
-    explicit random_access_iterator(It it) : it_(it) {}
-   template <class U>
-        random_access_iterator(const random_access_iterator<U>& u) :it_(u.it_) {}
+    TEST_CONSTEXPR_CXX14 random_access_iterator() : it_() {}
+    explicit TEST_CONSTEXPR_CXX14 random_access_iterator(It it) : it_(it) {}
+    template <class U>
+        TEST_CONSTEXPR_CXX14 random_access_iterator(const random_access_iterator<U>& u) :it_(u.it_) {}
 
-    reference operator*() const {return *it_;}
-    pointer operator->() const {return it_;}
+    TEST_CONSTEXPR_CXX14 reference operator*() const {return *it_;}
+    TEST_CONSTEXPR_CXX14 pointer operator->() const {return it_;}
 
-    random_access_iterator& operator++() {++it_; return *this;}
-    random_access_iterator operator++(int)
+    TEST_CONSTEXPR_CXX14 random_access_iterator& operator++() {++it_; return *this;}
+    TEST_CONSTEXPR_CXX14 random_access_iterator operator++(int)
         {random_access_iterator tmp(*this); ++(*this); return tmp;}
 
-    random_access_iterator& operator--() {--it_; return *this;}
-    random_access_iterator operator--(int)
+    TEST_CONSTEXPR_CXX14 random_access_iterator& operator--() {--it_; return *this;}
+    TEST_CONSTEXPR_CXX14 random_access_iterator operator--(int)
         {random_access_iterator tmp(*this); --(*this); return tmp;}
 
-    random_access_iterator& operator+=(difference_type n) {it_ += n; return *this;}
-    random_access_iterator operator+(difference_type n) const
+    TEST_CONSTEXPR_CXX14 random_access_iterator& operator+=(difference_type n) {it_ += n; return *this;}
+    TEST_CONSTEXPR_CXX14 random_access_iterator operator+(difference_type n) const
         {random_access_iterator tmp(*this); tmp += n; return tmp;}
-    friend random_access_iterator operator+(difference_type n, random_access_iterator x)
+    friend TEST_CONSTEXPR_CXX14 random_access_iterator operator+(difference_type n, random_access_iterator x)
         {x += n; return x;}
-    random_access_iterator& operator-=(difference_type n) {return *this += -n;}
-    random_access_iterator operator-(difference_type n) const
+    TEST_CONSTEXPR_CXX14 random_access_iterator& operator-=(difference_type n) {return *this += -n;}
+    TEST_CONSTEXPR_CXX14 random_access_iterator operator-(difference_type n) const
         {random_access_iterator tmp(*this); tmp -= n; return tmp;}
 
-    reference operator[](difference_type n) const {return it_[n];}
+    TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return it_[n];}
 
     template <class T>
     void operator,(T const &) DELETE_FUNCTION;
@@ -258,7 +258,7 @@ class random_access_iterator
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator==(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return x.base() == y.base();
@@ -266,7 +266,7 @@ operator==(const random_access_iterator<T>& x, const random_access_iterator<U>&
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator!=(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return !(x == y);
@@ -274,7 +274,7 @@ operator!=(const random_access_iterator<T>& x, const random_access_iterator<U>&
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator<(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return x.base() < y.base();
@@ -282,7 +282,7 @@ operator<(const random_access_iterator<T>& x, const random_access_iterator<U>& y
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator<=(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return !(y < x);
@@ -290,7 +290,7 @@ operator<=(const random_access_iterator<T>& x, const random_access_iterator<U>&
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator>(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return y < x;
@@ -298,14 +298,14 @@ operator>(const random_access_iterator<T>& x, const random_access_iterator<U>& y
 
 template <class T, class U>
 inline
-bool
+bool TEST_CONSTEXPR_CXX14
 operator>=(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
     return !(x < y);
 }
 
 template <class T, class U>
-inline
+inline TEST_CONSTEXPR_CXX14
 typename std::iterator_traits<T>::difference_type
 operator-(const random_access_iterator<T>& x, const random_access_iterator<U>& y)
 {
@@ -313,22 +313,22 @@ operator-(const random_access_iterator<T>& x, const random_access_iterator<U>& y
 }
 
 template <class Iter>
-inline Iter base(output_iterator<Iter> i) { return i.base(); }
+inline TEST_CONSTEXPR_CXX14 Iter base(output_iterator<Iter> i) { return i.base(); }
 
 template <class Iter>
-inline Iter base(input_iterator<Iter> i) { return i.base(); }
+inline TEST_CONSTEXPR_CXX14 Iter base(input_iterator<Iter> i) { return i.base(); }
 
 template <class Iter>
-inline Iter base(forward_iterator<Iter> i) { return i.base(); }
+inline TEST_CONSTEXPR_CXX14 Iter base(forward_iterator<Iter> i) { return i.base(); }
 
 template <class Iter>
-inline Iter base(bidirectional_iterator<Iter> i) { return i.base(); }
+inline TEST_CONSTEXPR_CXX14 Iter base(bidirectional_iterator<Iter> i) { return i.base(); }
 
 template <class Iter>
-inline Iter base(random_access_iterator<Iter> i) { return i.base(); }
+inline TEST_CONSTEXPR_CXX14 Iter base(random_access_iterator<Iter> i) { return i.base(); }
 
 template <class Iter>    // everything else
-inline Iter base(Iter i) { return i; }
+inline TEST_CONSTEXPR_CXX14 Iter base(Iter i) { return i; }
 
 template <typename T>
 struct ThrowingIterator {

From 2dcc0c5ee66570f02392d1fbf29f573fa47258f8 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:23:13 +0000
Subject: [PATCH 8/9] Vendor import of lld trunk r303291:
 https://llvm.org/svn/llvm-project/lld/trunk@303291

---
 ELF/Relocations.cpp |  30 ++++-------
 ELF/Relocations.h   |   2 +-
 ELF/Thunks.cpp      | 128 +++++++++++++++++++-------------------------
 ELF/Thunks.h        |   2 +-
 ELF/Writer.cpp      |   2 +-
 5 files changed, 68 insertions(+), 96 deletions(-)

diff --git a/ELF/Relocations.cpp b/ELF/Relocations.cpp
index ea7477e03842..c505a14f3c64 100644
--- a/ELF/Relocations.cpp
+++ b/ELF/Relocations.cpp
@@ -963,9 +963,8 @@ template <class ELFT> void elf::scanRelocations(InputSectionBase &S) {
 // in the Sections vector, and recalculate the InputSection output section
 // offsets.
 // This may invalidate any output section offsets stored outside of InputSection
-template <class ELFT>
-void ThunkCreator<ELFT>::mergeThunks(OutputSection *OS,
-                                     std::vector<ThunkSection *> &Thunks) {
+void ThunkCreator::mergeThunks(OutputSection *OS,
+                               std::vector<ThunkSection *> &Thunks) {
   // Order Thunks in ascending OutSecOff
   auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) {
     return A->OutSecOff < B->OutSecOff;
@@ -993,9 +992,8 @@ void ThunkCreator<ELFT>::mergeThunks(OutputSection *OS,
   OS->assignOffsets();
 }
 
-template <class ELFT>
-ThunkSection *ThunkCreator<ELFT>::getOSThunkSec(ThunkSection *&TS,
-                                                OutputSection *OS) {
+ThunkSection *ThunkCreator::getOSThunkSec(ThunkSection *&TS,
+                                          OutputSection *OS) {
   if (TS == nullptr) {
     uint32_t Off = 0;
     for (auto *IS : OS->Sections) {
@@ -1009,9 +1007,7 @@ ThunkSection *ThunkCreator<ELFT>::getOSThunkSec(ThunkSection *&TS,
   return TS;
 }
 
-template <class ELFT>
-ThunkSection *ThunkCreator<ELFT>::getISThunkSec(InputSection *IS,
-                                                OutputSection *OS) {
+ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) {
   ThunkSection *TS = ThunkedSections.lookup(IS);
   if (TS)
     return TS;
@@ -1022,12 +1018,11 @@ ThunkSection *ThunkCreator<ELFT>::getISThunkSec(InputSection *IS,
   return TS;
 }
 
-template <class ELFT>
-std::pair<Thunk *, bool> ThunkCreator<ELFT>::getThunk(SymbolBody &Body,
-                                                      uint32_t Type) {
+std::pair<Thunk *, bool> ThunkCreator::getThunk(SymbolBody &Body,
+                                                uint32_t Type) {
   auto res = ThunkedSymbols.insert({&Body, nullptr});
   if (res.second)
-    res.first->second = addThunk<ELFT>(Type, Body);
+    res.first->second = addThunk(Type, Body);
   return std::make_pair(res.first->second, res.second);
 }
 
@@ -1041,9 +1036,7 @@ std::pair<Thunk *, bool> ThunkCreator<ELFT>::getThunk(SymbolBody &Body,
 //
 // FIXME: All Thunks are assumed to be in range of the relocation. Range
 // extension Thunks are not yet supported.
-template <class ELFT>
-bool ThunkCreator<ELFT>::createThunks(
-    ArrayRef<OutputSection *> OutputSections) {
+bool ThunkCreator::createThunks(ArrayRef<OutputSection *> OutputSections) {
   // Create all the Thunks and insert them into synthetic ThunkSections. The
   // ThunkSections are later inserted back into the OutputSection.
 
@@ -1086,8 +1079,3 @@ template void elf::scanRelocations<ELF32LE>(InputSectionBase &);
 template void elf::scanRelocations<ELF32BE>(InputSectionBase &);
 template void elf::scanRelocations<ELF64LE>(InputSectionBase &);
 template void elf::scanRelocations<ELF64BE>(InputSectionBase &);
-
-template class elf::ThunkCreator<ELF32LE>;
-template class elf::ThunkCreator<ELF32BE>;
-template class elf::ThunkCreator<ELF64LE>;
-template class elf::ThunkCreator<ELF64BE>;
diff --git a/ELF/Relocations.h b/ELF/Relocations.h
index f8f0f11e14a9..f3512e0a89fc 100644
--- a/ELF/Relocations.h
+++ b/ELF/Relocations.h
@@ -119,7 +119,7 @@ template <class ELFT> void scanRelocations(InputSectionBase &);
 class ThunkSection;
 class Thunk;
 
-template <class ELFT> class ThunkCreator {
+class ThunkCreator {
 public:
   // Return true if Thunks have been added to OutputSections
   bool createThunks(ArrayRef<OutputSection *> OutputSections);
diff --git a/ELF/Thunks.cpp b/ELF/Thunks.cpp
index 80ea69663c01..da2b13677513 100644
--- a/ELF/Thunks.cpp
+++ b/ELF/Thunks.cpp
@@ -50,7 +50,7 @@ namespace {
 
 // Specific ARM Thunk implementations. The naming convention is:
 // Source State, TargetState, Target Requirement, ABS or PI, Range
-template <class ELFT> class ARMV7ABSLongThunk final : public Thunk {
+class ARMV7ABSLongThunk final : public Thunk {
 public:
   ARMV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) {}
 
@@ -59,7 +59,7 @@ template <class ELFT> class ARMV7ABSLongThunk final : public Thunk {
   void addSymbols(ThunkSection &IS) override;
 };
 
-template <class ELFT> class ARMV7PILongThunk final : public Thunk {
+class ARMV7PILongThunk final : public Thunk {
 public:
   ARMV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) {}
 
@@ -68,7 +68,7 @@ template <class ELFT> class ARMV7PILongThunk final : public Thunk {
   void addSymbols(ThunkSection &IS) override;
 };
 
-template <class ELFT> class ThumbV7ABSLongThunk final : public Thunk {
+class ThumbV7ABSLongThunk final : public Thunk {
 public:
   ThumbV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) {
     this->alignment = 2;
@@ -79,7 +79,7 @@ template <class ELFT> class ThumbV7ABSLongThunk final : public Thunk {
   void addSymbols(ThunkSection &IS) override;
 };
 
-template <class ELFT> class ThumbV7PILongThunk final : public Thunk {
+class ThumbV7PILongThunk final : public Thunk {
 public:
   ThumbV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) {
     this->alignment = 2;
@@ -91,7 +91,7 @@ template <class ELFT> class ThumbV7PILongThunk final : public Thunk {
 };
 
 // MIPS LA25 thunk
-template <class ELFT> class MipsThunk final : public Thunk {
+class MipsThunk final : public Thunk {
 public:
   MipsThunk(const SymbolBody &Dest) : Thunk(Dest) {}
 
@@ -109,117 +109,105 @@ static uint64_t getARMThunkDestVA(const SymbolBody &S) {
   return SignExtend64<32>(V);
 }
 
-template <class ELFT>
-void ARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
+void ARMV7ABSLongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
   const uint8_t Data[] = {
       0x00, 0xc0, 0x00, 0xe3, // movw         ip,:lower16:S
       0x00, 0xc0, 0x40, 0xe3, // movt         ip,:upper16:S
       0x1c, 0xff, 0x2f, 0xe1, // bx   ip
   };
-  uint64_t S = getARMThunkDestVA(this->Destination);
+  uint64_t S = getARMThunkDestVA(Destination);
   memcpy(Buf, Data, sizeof(Data));
   Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S);
   Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S);
 }
 
-template <class ELFT>
-void ARMV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal(
-      Saver.save("__ARMv7ABSLongThunk_" + this->Destination.getName()),
-      STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal("$a", STT_NOTYPE, this->Offset, 0, &IS);
+void ARMV7ABSLongThunk::addSymbols(ThunkSection &IS) {
+  ThunkSym = addSyntheticLocal(
+      Saver.save("__ARMv7ABSLongThunk_" + Destination.getName()), STT_FUNC,
+      Offset, size(), &IS);
+  addSyntheticLocal("$a", STT_NOTYPE, Offset, 0, &IS);
 }
 
-template <class ELFT>
-void ThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
+void ThumbV7ABSLongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
   const uint8_t Data[] = {
       0x40, 0xf2, 0x00, 0x0c, // movw         ip, :lower16:S
       0xc0, 0xf2, 0x00, 0x0c, // movt         ip, :upper16:S
       0x60, 0x47,             // bx   ip
   };
-  uint64_t S = getARMThunkDestVA(this->Destination);
+  uint64_t S = getARMThunkDestVA(Destination);
   memcpy(Buf, Data, sizeof(Data));
   Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S);
   Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S);
 }
 
-template <class ELFT>
-void ThumbV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal(
-      Saver.save("__Thumbv7ABSLongThunk_" + this->Destination.getName()),
-      STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal("$t", STT_NOTYPE, this->Offset, 0, &IS);
+void ThumbV7ABSLongThunk::addSymbols(ThunkSection &IS) {
+  ThunkSym = addSyntheticLocal(
+      Saver.save("__Thumbv7ABSLongThunk_" + Destination.getName()), STT_FUNC,
+      Offset, size(), &IS);
+  addSyntheticLocal("$t", STT_NOTYPE, Offset, 0, &IS);
 }
 
-template <class ELFT>
-void ARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
+void ARMV7PILongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
   const uint8_t Data[] = {
       0xf0, 0xcf, 0x0f, 0xe3, // P:  movw ip,:lower16:S - (P + (L1-P) +8)
       0x00, 0xc0, 0x40, 0xe3, //     movt ip,:upper16:S - (P + (L1-P+4) +8)
       0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
       0x1c, 0xff, 0x2f, 0xe1, //     bx r12
   };
-  uint64_t S = getARMThunkDestVA(this->Destination);
-  uint64_t P = this->ThunkSym->getVA();
+  uint64_t S = getARMThunkDestVA(Destination);
+  uint64_t P = ThunkSym->getVA();
   memcpy(Buf, Data, sizeof(Data));
   Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16);
   Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12);
 }
 
-template <class ELFT>
-void ARMV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal(
-      Saver.save("__ARMV7PILongThunk_" + this->Destination.getName()), STT_FUNC,
-      this->Offset, size(), &IS);
-  addSyntheticLocal("$a", STT_NOTYPE, this->Offset, 0, &IS);
+void ARMV7PILongThunk::addSymbols(ThunkSection &IS) {
+  ThunkSym = addSyntheticLocal(
+      Saver.save("__ARMV7PILongThunk_" + Destination.getName()), STT_FUNC,
+      Offset, size(), &IS);
+  addSyntheticLocal("$a", STT_NOTYPE, Offset, 0, &IS);
 }
 
-template <class ELFT>
-void ThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const {
+void ThumbV7PILongThunk::writeTo(uint8_t *Buf, ThunkSection &IS) const {
   const uint8_t Data[] = {
       0x4f, 0xf6, 0xf4, 0x7c, // P:  movw ip,:lower16:S - (P + (L1-P) + 4)
       0xc0, 0xf2, 0x00, 0x0c, //     movt ip,:upper16:S - (P + (L1-P+4) + 4)
       0xfc, 0x44,             // L1: add  r12, pc
       0x60, 0x47,             //     bx   r12
   };
-  uint64_t S = getARMThunkDestVA(this->Destination);
-  uint64_t P = this->ThunkSym->getVA();
+  uint64_t S = getARMThunkDestVA(Destination);
+  uint64_t P = ThunkSym->getVA();
   memcpy(Buf, Data, sizeof(Data));
   Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12);
   Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8);
 }
 
-template <class ELFT>
-void ThumbV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal(
-      Saver.save("__ThumbV7PILongThunk_" + this->Destination.getName()),
-      STT_FUNC, this->Offset, size(), &IS);
-  addSyntheticLocal("$t", STT_NOTYPE, this->Offset, 0, &IS);
+void ThumbV7PILongThunk::addSymbols(ThunkSection &IS) {
+  ThunkSym = addSyntheticLocal(
+      Saver.save("__ThumbV7PILongThunk_" + Destination.getName()), STT_FUNC,
+      Offset, size(), &IS);
+  addSyntheticLocal("$t", STT_NOTYPE, Offset, 0, &IS);
 }
 
 // Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
-template <class ELFT>
-void MipsThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &) const {
-  const endianness E = ELFT::TargetEndianness;
-
+void MipsThunk::writeTo(uint8_t *Buf, ThunkSection &) const {
   uint64_t S = this->Destination.getVA();
-  write32<E>(Buf, 0x3c190000);                // lui   $25, %hi(func)
-  write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j     func
-  write32<E>(Buf + 8, 0x27390000);            // addiu $25, $25, %lo(func)
-  write32<E>(Buf + 12, 0x00000000);           // nop
+  write32(Buf, 0x3c190000, Config->Endianness);                // lui   $25, %hi(func)
+  write32(Buf + 4, 0x08000000 | (S >> 2), Config->Endianness); // j     func
+  write32(Buf + 8, 0x27390000, Config->Endianness);            // addiu $25, $25, %lo(func)
+  write32(Buf + 12, 0x00000000, Config->Endianness);           // nop
   Target->relocateOne(Buf, R_MIPS_HI16, S);
   Target->relocateOne(Buf + 8, R_MIPS_LO16, S);
 }
 
-template <class ELFT> void MipsThunk<ELFT>::addSymbols(ThunkSection &IS) {
-  this->ThunkSym = addSyntheticLocal(
-      Saver.save("__LA25Thunk_" + this->Destination.getName()), STT_FUNC,
-      this->Offset, size(), &IS);
+void MipsThunk::addSymbols(ThunkSection &IS) {
+  ThunkSym =
+      addSyntheticLocal(Saver.save("__LA25Thunk_" + Destination.getName()),
+                        STT_FUNC, Offset, size(), &IS);
 }
 
-template <class ELFT>
-InputSection *MipsThunk<ELFT>::getTargetInputSection() const {
-  auto *DR = dyn_cast<DefinedRegular>(&this->Destination);
+InputSection *MipsThunk::getTargetInputSection() const {
+  auto *DR = dyn_cast<DefinedRegular>(&Destination);
   return dyn_cast<InputSection>(DR->Section);
 }
 
@@ -228,7 +216,7 @@ Thunk::Thunk(const SymbolBody &D) : Destination(D), Offset(0) {}
 Thunk::~Thunk() = default;
 
 // Creates a thunk for Thumb-ARM interworking.
-template <class ELFT> static Thunk *addThunkArm(uint32_t Reloc, SymbolBody &S) {
+static Thunk *addThunkArm(uint32_t Reloc, SymbolBody &S) {
   // ARM relocations need ARM to Thumb interworking Thunks.
   // Thumb relocations need Thumb to ARM relocations.
   // Use position independent Thunks if we require position independent code.
@@ -237,33 +225,29 @@ template <class ELFT> static Thunk *addThunkArm(uint32_t Reloc, SymbolBody &S) {
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
     if (Config->Pic)
-      return make<ARMV7PILongThunk<ELFT>>(S);
-    return make<ARMV7ABSLongThunk<ELFT>>(S);
+      return make<ARMV7PILongThunk>(S);
+    return make<ARMV7ABSLongThunk>(S);
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
     if (Config->Pic)
-      return make<ThumbV7PILongThunk<ELFT>>(S);
-    return make<ThumbV7ABSLongThunk<ELFT>>(S);
+      return make<ThumbV7PILongThunk>(S);
+    return make<ThumbV7ABSLongThunk>(S);
   }
   fatal("unrecognized relocation type");
 }
 
-template <class ELFT> static Thunk *addThunkMips(SymbolBody &S) {
-  return make<MipsThunk<ELFT>>(S);
+static Thunk *addThunkMips(SymbolBody &S) {
+  return make<MipsThunk>(S);
 }
 
-template <class ELFT> Thunk *addThunk(uint32_t RelocType, SymbolBody &S) {
+Thunk *addThunk(uint32_t RelocType, SymbolBody &S) {
   if (Config->EMachine == EM_ARM)
-    return addThunkArm<ELFT>(RelocType, S);
+    return addThunkArm(RelocType, S);
   else if (Config->EMachine == EM_MIPS)
-    return addThunkMips<ELFT>(S);
+    return addThunkMips(S);
   llvm_unreachable("add Thunk only supported for ARM and Mips");
   return nullptr;
 }
 
-template Thunk *addThunk<ELF32LE>(uint32_t, SymbolBody &);
-template Thunk *addThunk<ELF32BE>(uint32_t, SymbolBody &);
-template Thunk *addThunk<ELF64LE>(uint32_t, SymbolBody &);
-template Thunk *addThunk<ELF64BE>(uint32_t, SymbolBody &);
 } // end namespace elf
 } // end namespace lld
diff --git a/ELF/Thunks.h b/ELF/Thunks.h
index a9f49279f3f2..38ee090e75e1 100644
--- a/ELF/Thunks.h
+++ b/ELF/Thunks.h
@@ -51,7 +51,7 @@ class Thunk {
 
 // For a Relocation to symbol S create a Thunk to be added to a synthetic
 // ThunkSection. At present there are implementations for ARM and Mips Thunks.
-template <class ELFT> Thunk *addThunk(uint32_t RelocType, SymbolBody &S);
+Thunk *addThunk(uint32_t RelocType, SymbolBody &S);
 
 } // namespace elf
 } // namespace lld
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 4cdfce76202c..4be6fe53c18b 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -1223,7 +1223,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     // we need to assign addresses so that we can tell if jump instructions
     // are out of range. This will need to turn into a loop that converges
     // when no more Thunks are added
-    ThunkCreator<ELFT> TC;
+    ThunkCreator TC;
     if (TC.createThunks(OutputSections))
       applySynthetic({InX::MipsGot},
                      [](SyntheticSection *SS) { SS->updateAllocSize(); });

From 5a5de6ea3962782b02221b96b27dd064b25d381f Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 17 May 2017 20:23:22 +0000
Subject: [PATCH 9/9] Vendor import of lldb trunk r303291:
 https://llvm.org/svn/llvm-project/lldb/trunk@303291

---
 include/lldb/Symbol/SymbolContext.h           |  23 +++
 .../test/lang/c/conflicting-symbol/Makefile   |  18 ++
 .../test/lang/c/conflicting-symbol/One.mk     |  12 ++
 .../test/lang/c/conflicting-symbol/One/One.c  |   6 +
 .../test/lang/c/conflicting-symbol/One/One.h  |   4 +
 .../c/conflicting-symbol/One/OneConstant.c    |   1 +
 .../TestConflictingSymbol.py                  |  90 ++++++++++
 .../test/lang/c/conflicting-symbol/Two.mk     |  12 ++
 .../test/lang/c/conflicting-symbol/Two/Two.c  |   6 +
 .../test/lang/c/conflicting-symbol/Two/Two.h  |   4 +
 .../c/conflicting-symbol/Two/TwoConstant.c    |   1 +
 .../test/lang/c/conflicting-symbol/main.c     |  11 ++
 packages/Python/lldbsuite/test/lldbtest.py    |   2 +-
 source/Breakpoint/Breakpoint.cpp              |   4 +-
 .../Clang/ClangExpressionDeclMap.cpp          | 112 ++-----------
 .../Clang/ClangExpressionDeclMap.h            |  18 --
 source/Symbol/SymbolContext.cpp               | 157 ++++++++++++++++++
 17 files changed, 360 insertions(+), 121 deletions(-)
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Makefile
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One.mk
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.c
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.h
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/OneConstant.c
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/TestConflictingSymbol.py
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two.mk
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.c
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.h
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/TwoConstant.c
 create mode 100644 packages/Python/lldbsuite/test/lang/c/conflicting-symbol/main.c

diff --git a/include/lldb/Symbol/SymbolContext.h b/include/lldb/Symbol/SymbolContext.h
index e4dcc73bb52b..f84b7cf916fe 100644
--- a/include/lldb/Symbol/SymbolContext.h
+++ b/include/lldb/Symbol/SymbolContext.h
@@ -235,6 +235,29 @@ class SymbolContext {
 
   bool GetAddressRangeFromHereToEndLine(uint32_t end_line, AddressRange &range,
                                         Status &error);
+  
+  //------------------------------------------------------------------
+  /// Find the best global data symbol visible from this context.
+  ///
+  /// Symbol priority is:
+  ///     - extern symbol in the current module if there is one
+  ///     - non-extern symbol in the current module if there is one
+  ///     - extern symbol in the target
+  ///     - non-extern symbol in the target
+  /// It is an error if the highest-priority result is ambiguous.
+  ///
+  /// @param[in] name
+  ///     The name of the symbol to search for.
+  ///
+  /// @param[out] error
+  ///     An error that will be populated with a message if there was an
+  ///     ambiguous result.  The error will not be populated if no result
+  ///     was found.
+  ///
+  /// @return
+  ///     The symbol that was found, or \b nullptr if none was found.
+  //------------------------------------------------------------------
+  const Symbol *FindBestGlobalDataSymbol(const ConstString &name, Status &error);
 
   void GetDescription(Stream *s, lldb::DescriptionLevel level,
                       Target *target) const;
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Makefile b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Makefile
new file mode 100644
index 000000000000..f8a04bd32b90
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Makefile
@@ -0,0 +1,18 @@
+LEVEL := ../../../make
+
+LD_EXTRAS := -L. -l$(LIB_PREFIX)One -l$(LIB_PREFIX)Two
+C_SOURCES := main.c
+
+main.o : CFLAGS_EXTRAS += -g -O0
+
+include $(LEVEL)/Makefile.rules
+
+.PHONY:
+a.out: lib_One lib_Two
+
+lib_%:
+	$(MAKE) -f $*.mk
+
+clean::
+	$(MAKE) -f One.mk clean
+	$(MAKE) -f Two.mk clean
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One.mk b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One.mk
new file mode 100644
index 000000000000..04f894c595e8
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One.mk
@@ -0,0 +1,12 @@
+LEVEL := ../../../make
+
+DYLIB_NAME := One
+DYLIB_C_SOURCES := One/One.c One/OneConstant.c
+DYLIB_ONLY := YES
+
+include $(LEVEL)/Makefile.rules
+
+CFLAGS_EXTRAS += -fPIC
+
+One/OneConstant.o: One/OneConstant.c
+	$(CC) $(CFLAGS_NO_DEBUG) -c $< -o $@
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.c b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.c
new file mode 100644
index 000000000000..6bd729f65700
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.c
@@ -0,0 +1,6 @@
+#include "One.h"
+#include <stdio.h>
+
+void one() {
+  printf("One\n"); // break here
+}
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.h b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.h
new file mode 100644
index 000000000000..b59f5ad13f22
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/One.h
@@ -0,0 +1,4 @@
+#ifndef ONE_H
+#define ONE_H
+void one();
+#endif
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/OneConstant.c b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/OneConstant.c
new file mode 100644
index 000000000000..8255c2fce995
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/One/OneConstant.c
@@ -0,0 +1 @@
+int __attribute__ ((visibility("hidden"))) conflicting_symbol = 11111;
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/TestConflictingSymbol.py b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/TestConflictingSymbol.py
new file mode 100644
index 000000000000..d3327700bfd2
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/TestConflictingSymbol.py
@@ -0,0 +1,90 @@
+"""Test that conflicting symbols in different shared libraries work correctly"""
+
+from __future__ import print_function
+
+
+import os
+import time
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestConflictingSymbols(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def test_conflicting_symbols(self):
+        self.build()
+        exe = os.path.join(os.getcwd(), "a.out")
+        target = self.dbg.CreateTarget("a.out")
+        self.assertTrue(target, VALID_TARGET)
+
+        # Register our shared libraries for remote targets so they get
+        # automatically uploaded
+        environment = self.registerSharedLibrariesWithTarget(
+            target, ['One', 'Two'])
+
+        One_line = line_number('One/One.c', '// break here')
+        Two_line = line_number('Two/Two.c', '// break here')
+        main_line = line_number('main.c', '// break here')
+        lldbutil.run_break_set_command(
+            self, 'breakpoint set -f One.c -l %s' % (One_line))
+        lldbutil.run_break_set_command(
+            self, 'breakpoint set -f Two.c -l %s' % (Two_line))
+        lldbutil.run_break_set_by_file_and_line(
+            self, 'main.c', main_line, num_expected_locations=1, loc_exact=True)
+
+        process = target.LaunchSimple(
+            None, environment, self.get_process_working_directory())
+        self.assertTrue(process, PROCESS_IS_VALID)
+
+        # The stop reason of the thread should be breakpoint.
+        self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT,
+                    substrs=['stopped',
+                             'stop reason = breakpoint'])
+
+        self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE,
+                    substrs=[' resolved, hit count = 1'])
+
+        # This should display correctly.
+        self.expect(
+            "expr (unsigned long long)conflicting_symbol",
+            "Symbol from One should be found",
+            substrs=[
+                "11111"])
+
+        self.runCmd("continue", RUN_SUCCEEDED)
+
+        # The stop reason of the thread should be breakpoint.
+        self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT,
+                    substrs=['stopped',
+                             'stop reason = breakpoint'])
+
+        self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE,
+                    substrs=[' resolved, hit count = 1'])
+
+        self.expect(
+            "expr (unsigned long long)conflicting_symbol",
+            "Symbol from Two should be found",
+            substrs=[
+                "22222"])
+
+        self.runCmd("continue", RUN_SUCCEEDED)
+
+        # The stop reason of the thread should be breakpoint.
+        self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT,
+                    substrs=['stopped',
+                             'stop reason = breakpoint'])
+
+        self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE,
+                    substrs=[' resolved, hit count = 1'])
+
+        self.expect(
+            "expr (unsigned long long)conflicting_symbol",
+            "An error should be printed when symbols can't be ordered",
+            error=True,
+            substrs=[
+                "Multiple internal symbols"])
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two.mk b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two.mk
new file mode 100644
index 000000000000..117d9e00d443
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two.mk
@@ -0,0 +1,12 @@
+LEVEL := ../../../make
+
+DYLIB_NAME := Two
+DYLIB_C_SOURCES := Two/Two.c Two/TwoConstant.c
+DYLIB_ONLY := YES
+
+include $(LEVEL)/Makefile.rules
+
+CFLAGS_EXTRAS += -fPIC
+
+Two/TwoConstant.o: Two/TwoConstant.c
+	$(CC) $(CFLAGS_NO_DEBUG) -c $< -o $@
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.c b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.c
new file mode 100644
index 000000000000..8d8d668b8c31
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.c
@@ -0,0 +1,6 @@
+#include "Two.h"
+#include <stdio.h>
+
+void two() {
+  printf("Two\n"); // break here
+}
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.h b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.h
new file mode 100644
index 000000000000..8d5bd6a32330
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/Two.h
@@ -0,0 +1,4 @@
+#ifndef TWO_H
+#define TWO_H
+void two();
+#endif
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/TwoConstant.c b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/TwoConstant.c
new file mode 100644
index 000000000000..9fc7c4b79515
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/Two/TwoConstant.c
@@ -0,0 +1 @@
+int __attribute__ ((visibility("hidden"))) conflicting_symbol = 22222;
diff --git a/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/main.c b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/main.c
new file mode 100644
index 000000000000..4dcd443c0492
--- /dev/null
+++ b/packages/Python/lldbsuite/test/lang/c/conflicting-symbol/main.c
@@ -0,0 +1,11 @@
+#include "One/One.h"
+#include "Two/Two.h"
+
+#include <stdio.h>
+
+int main() {
+  one();
+  two();
+  printf("main\n"); // break here
+  return(0); 
+}
diff --git a/packages/Python/lldbsuite/test/lldbtest.py b/packages/Python/lldbsuite/test/lldbtest.py
index bc0fb1b686ad..f1d65cf2076d 100644
--- a/packages/Python/lldbsuite/test/lldbtest.py
+++ b/packages/Python/lldbsuite/test/lldbtest.py
@@ -1934,7 +1934,7 @@ def registerSharedLibrariesWithTarget(self, target, shlibs):
             # "libFoo.dylib" or "libFoo.so", or "Foo.so" for "Foo.so" or "libFoo.so", or just a
             # basename like "libFoo.so". So figure out which one it is and resolve the local copy
             # of the shared library accordingly
-            if os.path.exists(name):
+            if os.path.isfile(name):
                 local_shlib_path = name  # name is the full path to the local shared library
             else:
                 # Check relative names
diff --git a/source/Breakpoint/Breakpoint.cpp b/source/Breakpoint/Breakpoint.cpp
index 4c58f8231344..17c104ba0c60 100644
--- a/source/Breakpoint/Breakpoint.cpp
+++ b/source/Breakpoint/Breakpoint.cpp
@@ -837,8 +837,8 @@ bool Breakpoint::AddName(llvm::StringRef new_name, Status &error) {
   if (new_name.empty())
     return false;
   if (!BreakpointID::StringIsBreakpointName(new_name, error)) {
-    error.SetErrorStringWithFormat("input name \"%s\" not a breakpoint name.",
-                                   new_name);
+    error.SetErrorStringWithFormatv("input name \"{0}\" not a breakpoint name.",
+                                    new_name);
     return false;
   }
   if (!error.Success())
diff --git a/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 256d46a15420..8fde41052192 100644
--- a/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -591,103 +591,6 @@ addr_t ClangExpressionDeclMap::GetSymbolAddress(const ConstString &name,
                           symbol_type);
 }
 
-const Symbol *ClangExpressionDeclMap::FindGlobalDataSymbol(
-    Target &target, const ConstString &name, lldb_private::Module *module) {
-  SymbolContextList sc_list;
-
-  if (module)
-    module->FindSymbolsWithNameAndType(name, eSymbolTypeAny, sc_list);
-  else
-    target.GetImages().FindSymbolsWithNameAndType(name, eSymbolTypeAny,
-                                                  sc_list);
-
-  const uint32_t matches = sc_list.GetSize();
-  for (uint32_t i = 0; i < matches; ++i) {
-    SymbolContext sym_ctx;
-    sc_list.GetContextAtIndex(i, sym_ctx);
-    if (sym_ctx.symbol) {
-      const Symbol *symbol = sym_ctx.symbol;
-      const Address sym_address = symbol->GetAddress();
-
-      if (sym_address.IsValid()) {
-        switch (symbol->GetType()) {
-        case eSymbolTypeData:
-        case eSymbolTypeRuntime:
-        case eSymbolTypeAbsolute:
-        case eSymbolTypeObjCClass:
-        case eSymbolTypeObjCMetaClass:
-        case eSymbolTypeObjCIVar:
-          if (symbol->GetDemangledNameIsSynthesized()) {
-            // If the demangled name was synthesized, then don't use it
-            // for expressions. Only let the symbol match if the mangled
-            // named matches for these symbols.
-            if (symbol->GetMangled().GetMangledName() != name)
-              break;
-          }
-          return symbol;
-
-        case eSymbolTypeReExported: {
-          ConstString reexport_name = symbol->GetReExportedSymbolName();
-          if (reexport_name) {
-            ModuleSP reexport_module_sp;
-            ModuleSpec reexport_module_spec;
-            reexport_module_spec.GetPlatformFileSpec() =
-                symbol->GetReExportedSymbolSharedLibrary();
-            if (reexport_module_spec.GetPlatformFileSpec()) {
-              reexport_module_sp =
-                  target.GetImages().FindFirstModule(reexport_module_spec);
-              if (!reexport_module_sp) {
-                reexport_module_spec.GetPlatformFileSpec()
-                    .GetDirectory()
-                    .Clear();
-                reexport_module_sp =
-                    target.GetImages().FindFirstModule(reexport_module_spec);
-              }
-            }
-            // Don't allow us to try and resolve a re-exported symbol if it is
-            // the same
-            // as the current symbol
-            if (name == symbol->GetReExportedSymbolName() &&
-                module == reexport_module_sp.get())
-              return NULL;
-
-            return FindGlobalDataSymbol(target,
-                                        symbol->GetReExportedSymbolName(),
-                                        reexport_module_sp.get());
-          }
-        } break;
-
-        case eSymbolTypeCode: // We already lookup functions elsewhere
-        case eSymbolTypeVariable:
-        case eSymbolTypeLocal:
-        case eSymbolTypeParam:
-        case eSymbolTypeTrampoline:
-        case eSymbolTypeInvalid:
-        case eSymbolTypeException:
-        case eSymbolTypeSourceFile:
-        case eSymbolTypeHeaderFile:
-        case eSymbolTypeObjectFile:
-        case eSymbolTypeCommonBlock:
-        case eSymbolTypeBlock:
-        case eSymbolTypeVariableType:
-        case eSymbolTypeLineEntry:
-        case eSymbolTypeLineHeader:
-        case eSymbolTypeScopeBegin:
-        case eSymbolTypeScopeEnd:
-        case eSymbolTypeAdditional:
-        case eSymbolTypeCompiler:
-        case eSymbolTypeInstrumentation:
-        case eSymbolTypeUndefined:
-        case eSymbolTypeResolver:
-          break;
-        }
-      }
-    }
-  }
-
-  return NULL;
-}
-
 lldb::VariableSP ClangExpressionDeclMap::FindGlobalVariable(
     Target &target, ModuleSP &module, const ConstString &name,
     CompilerDeclContext *namespace_decl, TypeFromUser *type) {
@@ -1526,9 +1429,18 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
       // We couldn't find a non-symbol variable for this.  Now we'll hunt for
       // a generic
       // data symbol, and -- if it is found -- treat it as a variable.
-
-      const Symbol *data_symbol = FindGlobalDataSymbol(*target, name);
-
+      Status error;
+      
+      const Symbol *data_symbol =
+          m_parser_vars->m_sym_ctx.FindBestGlobalDataSymbol(name, error);
+      
+      if (!error.Success()) {
+        const unsigned diag_id =
+            m_ast_context->getDiagnostics().getCustomDiagID(
+                clang::DiagnosticsEngine::Level::Error, "%0");
+        m_ast_context->getDiagnostics().Report(diag_id) << error.AsCString();
+      }
+                                          
       if (data_symbol) {
         std::string warning("got name from symbols: ");
         warning.append(name.AsCString());
diff --git a/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h b/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
index ac88c1d6b891..e8a9ba6862db 100644
--- a/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
+++ b/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
@@ -447,24 +447,6 @@ class ClangExpressionDeclMap : public ClangASTSource {
   //----------------------------------------------------------------------
   uint64_t GetParserID() { return (uint64_t) this; }
 
-  //------------------------------------------------------------------
-  /// Given a target, find a data symbol that has the given name.
-  ///
-  /// @param[in] target
-  ///     The target to use as the basis for the search.
-  ///
-  /// @param[in] name
-  ///     The name as a plain C string.
-  ///
-  /// @param[in] module
-  ///     The module to limit the search to. This can be NULL
-  ///
-  /// @return
-  ///     The LLDB Symbol found, or NULL if none was found.
-  //------------------------------------------------------------------
-  const Symbol *FindGlobalDataSymbol(Target &target, const ConstString &name,
-                                     Module *module = NULL);
-
   //------------------------------------------------------------------
   /// Given a target, find a variable that matches the given name and
   /// type.
diff --git a/source/Symbol/SymbolContext.cpp b/source/Symbol/SymbolContext.cpp
index 5ea6f91200c1..4ac35010c74c 100644
--- a/source/Symbol/SymbolContext.cpp
+++ b/source/Symbol/SymbolContext.cpp
@@ -799,6 +799,163 @@ bool SymbolContext::GetAddressRangeFromHereToEndLine(uint32_t end_line,
   return true;
 }
 
+const Symbol *
+SymbolContext::FindBestGlobalDataSymbol(const ConstString &name, Status &error) {
+  error.Clear();
+  
+  if (!target_sp) {
+    return nullptr;
+  }
+  
+  Target &target = *target_sp;
+  Module *module = module_sp.get();
+  
+  auto ProcessMatches = [this, &name, &target, module]
+  (SymbolContextList &sc_list, Status &error) -> const Symbol* {
+    llvm::SmallVector<const Symbol *, 1> external_symbols;
+    llvm::SmallVector<const Symbol *, 1> internal_symbols;
+    const uint32_t matches = sc_list.GetSize();
+    for (uint32_t i = 0; i < matches; ++i) {
+      SymbolContext sym_ctx;
+      sc_list.GetContextAtIndex(i, sym_ctx);
+      if (sym_ctx.symbol) {
+        const Symbol *symbol = sym_ctx.symbol;
+        const Address sym_address = symbol->GetAddress();
+        
+        if (sym_address.IsValid()) {
+          switch (symbol->GetType()) {
+            case eSymbolTypeData:
+            case eSymbolTypeRuntime:
+            case eSymbolTypeAbsolute:
+            case eSymbolTypeObjCClass:
+            case eSymbolTypeObjCMetaClass:
+            case eSymbolTypeObjCIVar:
+              if (symbol->GetDemangledNameIsSynthesized()) {
+                // If the demangled name was synthesized, then don't use it
+                // for expressions. Only let the symbol match if the mangled
+                // named matches for these symbols.
+                if (symbol->GetMangled().GetMangledName() != name)
+                  break;
+              }
+              if (symbol->IsExternal()) {
+                external_symbols.push_back(symbol);
+              } else {
+                internal_symbols.push_back(symbol);
+              }
+              break;
+            case eSymbolTypeReExported: {
+              ConstString reexport_name = symbol->GetReExportedSymbolName();
+              if (reexport_name) {
+                ModuleSP reexport_module_sp;
+                ModuleSpec reexport_module_spec;
+                reexport_module_spec.GetPlatformFileSpec() =
+                symbol->GetReExportedSymbolSharedLibrary();
+                if (reexport_module_spec.GetPlatformFileSpec()) {
+                  reexport_module_sp =
+                  target.GetImages().FindFirstModule(reexport_module_spec);
+                  if (!reexport_module_sp) {
+                    reexport_module_spec.GetPlatformFileSpec()
+                    .GetDirectory()
+                    .Clear();
+                    reexport_module_sp =
+                    target.GetImages().FindFirstModule(reexport_module_spec);
+                  }
+                }
+                // Don't allow us to try and resolve a re-exported symbol if it is
+                // the same as the current symbol
+                if (name == symbol->GetReExportedSymbolName() &&
+                    module == reexport_module_sp.get())
+                  return nullptr;
+                
+                return FindBestGlobalDataSymbol(
+                    symbol->GetReExportedSymbolName(), error);
+              }
+            } break;
+              
+            case eSymbolTypeCode: // We already lookup functions elsewhere
+            case eSymbolTypeVariable:
+            case eSymbolTypeLocal:
+            case eSymbolTypeParam:
+            case eSymbolTypeTrampoline:
+            case eSymbolTypeInvalid:
+            case eSymbolTypeException:
+            case eSymbolTypeSourceFile:
+            case eSymbolTypeHeaderFile:
+            case eSymbolTypeObjectFile:
+            case eSymbolTypeCommonBlock:
+            case eSymbolTypeBlock:
+            case eSymbolTypeVariableType:
+            case eSymbolTypeLineEntry:
+            case eSymbolTypeLineHeader:
+            case eSymbolTypeScopeBegin:
+            case eSymbolTypeScopeEnd:
+            case eSymbolTypeAdditional:
+            case eSymbolTypeCompiler:
+            case eSymbolTypeInstrumentation:
+            case eSymbolTypeUndefined:
+            case eSymbolTypeResolver:
+              break;
+          }
+        }
+      }
+    }
+    
+    if (external_symbols.size() > 1) {
+      StreamString ss;
+      ss.Printf("Multiple external symbols found for '%s'\n", name.AsCString());
+      for (const Symbol *symbol : external_symbols) {
+        symbol->GetDescription(&ss, eDescriptionLevelFull, &target);
+      }
+      ss.PutChar('\n');
+      error.SetErrorString(ss.GetData());
+      return nullptr;
+    } else if (external_symbols.size()) {
+      return external_symbols[0];
+    } else if (internal_symbols.size() > 1) {
+      StreamString ss;
+      ss.Printf("Multiple internal symbols found for '%s'\n", name.AsCString());
+      for (const Symbol *symbol : internal_symbols) {
+        symbol->GetDescription(&ss, eDescriptionLevelVerbose, &target);
+        ss.PutChar('\n');
+      }
+      error.SetErrorString(ss.GetData());
+      return nullptr;
+    } else if (internal_symbols.size()) {
+      return internal_symbols[0];
+    } else {
+      return nullptr;
+    }
+  };
+  
+  if (module) {
+    SymbolContextList sc_list;
+    module->FindSymbolsWithNameAndType(name, eSymbolTypeAny, sc_list);
+    const Symbol *const module_symbol = ProcessMatches(sc_list, error);
+    
+    if (!error.Success()) {
+      return nullptr;
+    } else if (module_symbol) {
+      return module_symbol;
+    }
+  }
+  
+  {
+    SymbolContextList sc_list;
+    target.GetImages().FindSymbolsWithNameAndType(name, eSymbolTypeAny,
+                                                  sc_list);
+    const Symbol *const target_symbol = ProcessMatches(sc_list, error);
+    
+    if (!error.Success()) {
+      return nullptr;
+    } else if (target_symbol) {
+      return target_symbol;
+    }
+  }
+  
+  return nullptr; // no error; we just didn't find anything
+}
+
+
 //----------------------------------------------------------------------
 //
 //  SymbolContextSpecifier