Vendor import of llvm trunk r300890:

https://llvm.org/svn/llvm-project/llvm/trunk@300890
2017-04-20 21:19:10 +00:00 · 2017-04-20 21:19:10 +00:00 · d99dafe2e4
commit d99dafe2e4
parent 71d5a2540a
433 changed files with 36829 additions and 11114 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -512,6 +512,9 @@ set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "share/doc/llvm/ocaml-html"
 option (LLVM_BUILD_EXTERNAL_COMPILER_RT
  "Build compiler-rt as an external project." OFF)

+option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
+  "Show target and host info when tools are invoked with --version." ON)
+
 # You can configure which libraries from LLVM you want to include in the
 # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
 # list of LLVM components. All component names handled by llvm-config are valid.
--- a/bindings/go/llvm/DIBuilderBindings.cpp
+++ b/bindings/go/llvm/DIBuilderBindings.cpp
@ -19,8 +19,6 @@

 using namespace llvm;

-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
-
 LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) {
  Module *m = unwrap(mref);
  return wrap(new DIBuilder(*m));
--- a/bindings/go/llvm/IRBindings.h
+++ b/bindings/go/llvm/IRBindings.h
@ -26,7 +26,6 @@
 extern "C" {
 #endif

-typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
 struct LLVMDebugLocMetadata{
    unsigned Line;
    unsigned Col;
@ -59,16 +58,6 @@ void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP);
 #ifdef __cplusplus
 }

-namespace llvm {
-
-DEFINE_ISA_CONVERSION_FUNCTIONS(Metadata, LLVMMetadataRef)
-
-inline Metadata **unwrap(LLVMMetadataRef *Vals) {
-  return reinterpret_cast<Metadata**>(Vals);
-}
-
-}
-
 #endif

 #endif
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@ -81,8 +81,9 @@ function(add_llvm_symbol_exports target_name export_file)
    # Gold and BFD ld require a version script rather than a plain list.
    set(native_export_file "${target_name}.exports")
    # FIXME: Don't write the "local:" line on OpenBSD.
+    # in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M)
    add_custom_command(OUTPUT ${native_export_file}
-      COMMAND echo "{" > ${native_export_file}
+      COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file}
      COMMAND grep -q "[[:alnum:]]" ${export_file} && echo "  global:" >> ${native_export_file} || :
      COMMAND sed -e "s/$/;/" -e "s/^/    /" < ${export_file} >> ${native_export_file}
      COMMAND echo "  local: *;" >> ${native_export_file}
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@ -550,6 +550,8 @@ LLVM IR is defined with the following blocks:

 * 17 --- `TYPE_BLOCK`_ --- This describes all of the types in the module.

+* 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table.
+
 .. _MODULE_BLOCK:

 MODULE_BLOCK Contents
@ -577,7 +579,7 @@ MODULE_CODE_VERSION Record
 ``[VERSION, version#]``

 The ``VERSION`` record (code 1) contains a single value indicating the format
-version. Versions 0 and 1 are supported at this time. The difference between
+version. Versions 0, 1 and 2 are supported at this time. The difference between
 version 0 and 1 is in the encoding of instruction operands in
 each `FUNCTION_BLOCK`_.

@ -620,6 +622,12 @@ as unsigned VBRs. However, forward references are rare, except in the
 case of phi instructions. For phi instructions, operands are encoded as
 `Signed VBRs`_ to deal with forward references.

+In version 2, the meaning of module records ``FUNCTION``, ``GLOBALVAR``,
+``ALIAS``, ``IFUNC`` and ``COMDAT`` change such that the first two operands
+specify an offset and size of a string in a string table (see `STRTAB_BLOCK
+Contents`_), the function name is removed from the ``FNENTRY`` record in the
+value symbol table, and the top-level ``VALUE_SYMTAB_BLOCK`` may only contain
+``FNENTRY`` records.

 MODULE_CODE_TRIPLE Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@ -673,11 +681,14 @@ for each library name referenced.
 MODULE_CODE_GLOBALVAR Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]``
+``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]``

 The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a
 global variable. The operand fields are:

+* *strtab offset*, *strtab size*: Specifies the name of the global variable.
+  See `STRTAB_BLOCK Contents`_.
+
 * *pointer type*: The type index of the pointer type used to point to this
  global variable

@ -755,11 +766,14 @@ global variable. The operand fields are:
 MODULE_CODE_FUNCTION Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

-``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]``
+``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]``

 The ``FUNCTION`` record (code 8) marks the declaration or definition of a
 function. The operand fields are:

+* *strtab offset*, *strtab size*: Specifies the name of the function.
+  See `STRTAB_BLOCK Contents`_.
+
 * *type*: The type index of the function type describing this function

 * *callingconv*: The calling convention number:
@ -817,11 +831,14 @@ function. The operand fields are:
 MODULE_CODE_ALIAS Record
 ^^^^^^^^^^^^^^^^^^^^^^^^

-``[ALIAS, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]``
+``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]``

 The ``ALIAS`` record (code 9) marks the definition of an alias. The operand
 fields are

+* *strtab offset*, *strtab size*: Specifies the name of the alias.
+  See `STRTAB_BLOCK Contents`_.
+
 * *alias type*: The type index of the alias

 * *aliasee val#*: The value index of the aliased value
@ -1300,3 +1317,20 @@ METADATA_ATTACHMENT Contents
 ----------------------------

 The ``METADATA_ATTACHMENT`` block (id 16) ...
+
+.. _STRTAB_BLOCK:
+
+STRTAB_BLOCK Contents
+---------------------
+
+The ``STRTAB`` block (id 23) contains a single record (``STRTAB_BLOB``, id 1)
+with a single blob operand containing the bitcode file's string table.
+
+Strings in the string table are not null terminated. A record's *strtab
+offset* and *strtab size* operands specify the byte offset and size of a
+string within the string table.
+
+The string table is used by all preceding blocks in the bitcode file that are
+not succeeded by another intervening ``STRTAB`` block. Normally a bitcode
+file will have a single string table, but it may have more than one if it
+was created by binary concatenation of multiple bitcode files.
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -4380,7 +4380,7 @@ referenced LLVM variable relates to the source language variable.

 The current supported vocabulary is limited:

- ``DW_OP_deref`` dereferences the working expression.
+- ``DW_OP_deref`` dereferences the top of the expression stack.
 - ``DW_OP_plus, 93`` adds ``93`` to the working expression.
 - ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8``
  here, respectively) of the variable fragment from the working expression. Note
@ -4396,12 +4396,17 @@ DIExpression nodes that contain a ``DW_OP_stack_value`` operator are standalone
 location descriptions that describe constant values. This form is used to
 describe global constants that have been optimized away. All other expressions
 are modifiers to another location: A debug intrinsic ties a location and a
-DIExpression together. Contrary to DWARF expressions, a DIExpression always
-describes the *value* of a source variable and never its *address*. In DWARF
-terminology, a DIExpression can always be considered an implicit location
-description regardless whether it contains a ``DW_OP_stack_value`` or not.
+DIExpression together.

-.. code-block:: text
+DWARF specifies three kinds of simple location descriptions: Register, memory,
+and implicit location descriptions. Register and memory location descriptions
+describe the *location* of a source variable (in the sense that a debugger might
+modify its value), whereas implicit locations describe merely the *value* of a
+source variable. DIExpressions also follow this model: A DIExpression that
+doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when
+combined with a concrete location.
+
+.. code-block:: llvm

    !0 = !DIExpression(DW_OP_deref)
    !1 = !DIExpression(DW_OP_plus, 3)
@ -12285,6 +12290,7 @@ The third argument is a metadata argument specifying the rounding mode to be
 assumed. This argument must be one of the following strings:

 ::
+
      "round.dynamic"
      "round.tonearest"
      "round.downward"
@ -12316,6 +12322,7 @@ required exception behavior.  This argument must be one of the following
 strings:

 ::
+
      "fpexcept.ignore"
      "fpexcept.maytrap"
      "fpexcept.strict"
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@ -180,11 +180,27 @@ provide debug information at various points in generated code.

  void @llvm.dbg.declare(metadata, metadata, metadata)

-This intrinsic provides information about a local element (e.g., variable).
-The first argument is metadata holding the alloca for the variable.  The second
+This intrinsic provides information about a local element (e.g., variable).  The
+first argument is metadata holding the alloca for the variable.  The second
 argument is a `local variable <LangRef.html#dilocalvariable>`_ containing a
 description of the variable.  The third argument is a `complex expression
-<LangRef.html#diexpression>`_.
+<LangRef.html#diexpression>`_.  An `llvm.dbg.declare` instrinsic describes the
+*location* of a source variable.
+
+.. code-block:: llvm
+
+    %i.addr = alloca i32, align 4
+    call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !1, metadata !2), !dbg !3
+    !1 = !DILocalVariable(name: "i", ...) ; int i
+    !2 = !DIExpression()
+    !3 = !DILocation(...)
+    ...
+    %buffer = alloca [256 x i8], align 8
+    ; The address of i is buffer+64.
+    call void @llvm.dbg.declare(metadata [256 x i8]* %buffer, metadata !1, metadata !2)
+    !1 = !DILocalVariable(name: "i", ...) ; int i
+    !2 = !DIExpression(DW_OP_plus, 64)
+

 ``llvm.dbg.value``
 ^^^^^^^^^^^^^^^^^^
--- a/docs/Statepoints.rst
+++ b/docs/Statepoints.rst
@ -9,15 +9,22 @@ Garbage Collection Safepoints in LLVM
 Status
 =======

-This document describes a set of experimental extensions to LLVM. Use
-with caution.  Because the intrinsics have experimental status,
-compatibility across LLVM releases is not guaranteed.
+This document describes a set of extensions to LLVM to support garbage
+collection.  By now, these mechanisms are well proven with commercial java 
+implementation with a fully relocating collector having shipped using them.  
+There are a couple places where bugs might still linger; these are called out
+below.

-LLVM currently supports an alternate mechanism for conservative
-garbage collection support using the ``gcroot`` intrinsic.  The mechanism
-described here shares little in common with the alternate ``gcroot``
-implementation and it is hoped that this mechanism will eventually
-replace the gc_root mechanism.
+They are still listed as "experimental" to indicate that no forward or backward
+compatibility guarantees are offered across versions.  If your use case is such 
+that you need some form of forward compatibility guarantee, please raise the 
+issue on the llvm-dev mailing list.  
+
+LLVM still supports an alternate mechanism for conservative garbage collection 
+support using the ``gcroot`` intrinsic.  The ``gcroot`` mechanism is mostly of
+historical interest at this point with one exception - its implementation of
+shadow stacks has been used successfully by a number of language frontends and
+is still supported.  

 Overview
 ========
@ -86,9 +93,36 @@ the collector must be able to:

 This document describes the mechanism by which an LLVM based compiler
 can provide this information to a language runtime/collector, and
-ensure that all pointers can be read and updated if desired.  The
-heart of the approach is to construct (or rewrite) the IR in a manner
-where the possible updates performed by the garbage collector are
+ensure that all pointers can be read and updated if desired.  
+
+At a high level, LLVM has been extended to support compiling to an abstract 
+machine which extends the actual target with a non-integral pointer type 
+suitable for representing a garbage collected reference to an object.  In 
+particular, such non-integral pointer type have no defined mapping to an 
+integer representation.  This semantic quirk allows the runtime to pick a 
+integer mapping for each point in the program allowing relocations of objects 
+without visible effects.
+
+Warning: Non-Integral Pointer Types are a newly added concept in LLVM IR.  
+It's possible that we've missed disabling some of the optimizations which 
+assume an integral value for pointers.  If you find such a case, please 
+file a bug or share a patch.
+
+Warning: There is one currently known semantic hole in the definition of 
+non-integral pointers which has not been addressed upstream.  To work around
+this, you need to disable speculation of loads unless the memory type 
+(non-integral pointer vs anything else) is known to unchanged.  That is, it is 
+not safe to speculate a load if doing causes a non-integral pointer value to 
+be loaded as any other type or vice versa.  In practice, this restriction is 
+well isolated to isSafeToSpeculate in ValueTracking.cpp.
+
+This high level abstract machine model is used for most of the LLVM optimizer.
+Before starting code generation, we switch representations to an explicit form.
+In theory, a frontend could directly generate this low level explicit form, but 
+doing so is likely to inhibit optimization.  
+
+The heart of the explicit approach is to construct (or rewrite) the IR in a 
+manner where the possible updates performed by the garbage collector are
 explicitly visible in the IR.  Doing so requires that we:

 #. create a new SSA value for each potentially relocated pointer, and
@ -104,7 +138,7 @@ explicitly visible in the IR.  Doing so requires that we:
 At the most abstract level, inserting a safepoint can be thought of as
 replacing a call instruction with a call to a multiple return value
 function which both calls the original target of the call, returns
-it's result, and returns updated values for any live pointers to
+its result, and returns updated values for any live pointers to
 garbage collected objects.

  Note that the task of identifying all live pointers to garbage
@ -200,7 +234,9 @@ The relevant parts of the StackMap section for our example are:
 	  .short	7
 	  .long	0

-This example was taken from the tests for the :ref:`RewriteStatepointsForGC` utility pass.  As such, it's full StackMap can be easily examined with the following command.
+This example was taken from the tests for the :ref:`RewriteStatepointsForGC`
+utility pass.  As such, its full StackMap can be easily examined with the
+following command.

 .. code-block:: bash

@ -536,7 +572,7 @@ Semantics:
 """"""""""

 The return value of ``gc.relocate`` is the potentially relocated value
-of the pointer specified by it's arguments.  It is unspecified how the
+of the pointer specified by its arguments.  It is unspecified how the
 value of the returned pointer relates to the argument to the
 ``gc.statepoint`` other than that a) it points to the same source
 language object with the same offset, and b) the 'based-on'
@ -654,11 +690,15 @@ Utility Passes for Safepoint Insertion
 RewriteStatepointsForGC
 ^^^^^^^^^^^^^^^^^^^^^^^^

-The pass RewriteStatepointsForGC transforms a functions IR by replacing a 
-``gc.statepoint`` (with an optional ``gc.result``) with a full relocation 
-sequence, including all required ``gc.relocates``.  To function, the pass 
-requires that the GC strategy specified for the function be able to reliably 
-distinguish between GC references and non-GC references in IR it is given.
+The pass RewriteStatepointsForGC transforms a function's IR to lower from the
+abstract machine model described above to the explicit statepoint model of 
+relocations.  To do this, it replaces all calls or invokes of functions which
+might contain a safepoint poll with a ``gc.statepoint`` and associated full
+relocation sequence, including all required ``gc.relocates``.  
+
+Note that by default, this pass only runs for the "statepoint-example" or 
+"core-clr" gc strategies.  You will need to add your custom strategy to this 
+whitelist or use one of the predefined ones. 

 As an example, given this code:

@ -666,7 +706,7 @@ As an example, given this code:

  define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) 
         gc "statepoint-example" {
-    call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+    call void @foo()
    ret i8 addrspace(1)* %obj
  }

@ -683,7 +723,8 @@ The pass would produce this IR:

 In the above examples, the addrspace(1) marker on the pointers is the mechanism
 that the ``statepoint-example`` GC strategy uses to distinguish references from
-non references.  Address space 1 is not globally reserved for this purpose.
+non references.  The pass assumes that all addrspace(1) pointers are non-integral
+pointer types.  Address space 1 is not globally reserved for this purpose.

 This pass can be used an utility function by a language frontend that doesn't 
 want to manually reason about liveness, base pointers, or relocation when 
@ -701,23 +742,34 @@ can be relaxed to producing interior derived pointers provided the target
 collector can find the associated allocation from an arbitrary interior 
 derived pointer.

-In practice, RewriteStatepointsForGC can be run much later in the pass 
+By default RewriteStatepointsForGC passes in ``0xABCDEF00`` as the statepoint
+ID and ``0`` as the number of patchable bytes to the newly constructed
+``gc.statepoint``.  These values can be configured on a per-callsite
+basis using the attributes ``"statepoint-id"`` and
+``"statepoint-num-patch-bytes"``.  If a call site is marked with a
+``"statepoint-id"`` function attribute and its value is a positive
+integer (represented as a string), then that value is used as the ID
+of the newly constructed ``gc.statepoint``.  If a call site is marked
+with a ``"statepoint-num-patch-bytes"`` function attribute and its
+value is a positive integer, then that value is used as the 'num patch
+bytes' parameter of the newly constructed ``gc.statepoint``.  The
+``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes
+are not propagated to the ``gc.statepoint`` call or invoke if they
+could be successfully parsed.
+
+In practice, RewriteStatepointsForGC should be run much later in the pass 
 pipeline, after most optimization is already done.  This helps to improve 
 the quality of the generated code when compiled with garbage collection support.
-In the long run, this is the intended usage model.  At this time, a few details
-have yet to be worked out about the semantic model required to guarantee this 
-is always correct.  As such, please use with caution and report bugs.

 .. _PlaceSafepoints:

 PlaceSafepoints
 ^^^^^^^^^^^^^^^^

-The pass PlaceSafepoints transforms a function's IR by replacing any call or 
-invoke instructions with appropriate ``gc.statepoint`` and ``gc.result`` pairs,
-and inserting safepoint polls sufficient to ensure running code checks for a 
-safepoint request on a timely manner.  This pass is expected to be run before 
-RewriteStatepointsForGC and thus does not produce full relocation sequences.  
+The pass PlaceSafepoints inserts safepoint polls sufficient to ensure running 
+code checks for a safepoint request on a timely manner. This pass is expected 
+to be run before RewriteStatepointsForGC and thus does not produce full 
+relocation sequences.  

 As an example, given input IR of the following:

@ -740,13 +792,16 @@ This pass would produce the following IR:
 .. code-block:: text

  define void @test() gc "statepoint-example" {
-    %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-    %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+    call void @do_safepoint()
+    call void @foo()
    ret void
  }

-In this case, we've added an (unconditional) entry safepoint poll and converted the call into a ``gc.statepoint``.  Note that despite appearances, the entry poll is not necessarily redundant.  We'd have to know that ``foo`` and ``test`` were not mutually recursive for the poll to be redundant.  In practice, you'd probably want to your poll definition to contain a conditional branch of some form.
-
+In this case, we've added an (unconditional) entry safepoint poll.  Note that 
+despite appearances, the entry poll is not necessarily redundant.  We'd have to 
+know that ``foo`` and ``test`` were not mutually recursive for the poll to be 
+redundant.  In practice, you'd probably want to your poll definition to contain 
+a conditional branch of some form.

 At the moment, PlaceSafepoints can insert safepoint polls at method entry and 
 loop backedges locations.  Extending this to work with return polls would be 
@ -763,26 +818,13 @@ of this function is inserted at each poll site desired.  While calls or invokes
 inside this method are transformed to a ``gc.statepoints``, recursive poll 
 insertion is not performed.

-By default PlaceSafepoints passes in ``0xABCDEF00`` as the statepoint
-ID and ``0`` as the number of patchable bytes to the newly constructed
-``gc.statepoint``.  These values can be configured on a per-callsite
-basis using the attributes ``"statepoint-id"`` and
-``"statepoint-num-patch-bytes"``.  If a call site is marked with a
-``"statepoint-id"`` function attribute and its value is a positive
-integer (represented as a string), then that value is used as the ID
-of the newly constructed ``gc.statepoint``.  If a call site is marked
-with a ``"statepoint-num-patch-bytes"`` function attribute and its
-value is a positive integer, then that value is used as the 'num patch
-bytes' parameter of the newly constructed ``gc.statepoint``.  The
-``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes
-are not propagated to the ``gc.statepoint`` call or invoke if they
-could be successfully parsed.
-
-If you are scheduling the RewriteStatepointsForGC pass late in the pass order,
-you should probably schedule this pass immediately before it.  The exception 
-would be if you need to preserve abstract frame information (e.g. for
-deoptimization or introspection) at safepoints.  In that case, ask on the 
-llvm-dev mailing list for suggestions.
+This pass is useful for any language frontend which only has to support
+garbage collection semantics at safepoints.  If you need other abstract
+frame information at safepoints (e.g. for deoptimization or introspection),
+you can insert safepoint polls in the frontend.  If you have the later case,
+please ask on llvm-dev for suggestions.  There's been a good amount of work
+done on making such a scheme work well in practice which is not yet documented
+here.  


 Supported Architectures
@ -794,13 +836,6 @@ Today, only X86_64 is supported.
 Problem Areas and Active Work
 =============================

-#. As the existing users of the late rewriting model have matured, we've found
-   cases where the optimizer breaks the assumption that an SSA value of
-   gc-pointer type actually contains a gc-pointer and vice-versa.  We need to
-   clarify our expectations and propose at least one small IR change.  (Today,
-   the gc-pointer distinction is managed via address spaces.  This turns out
-   not to be quite strong enough.)
-
 #. Support for languages which allow unmanaged pointers to garbage collected
   objects (i.e. pass a pointer to an object to a C routine) via pinning.

--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@ -2130,6 +2130,16 @@ LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
 */
 LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);

+/**
+ * Obtain a Metadata as a Value.
+ */
+LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD);
+
+/**
+ * Obtain a Value as a Metadata.
+ */
+LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val);
+
 /**
 * Obtain the underlying string from a MDString value.
 *
--- a/include/llvm-c/Types.h
+++ b/include/llvm-c/Types.h
@ -82,6 +82,13 @@ typedef struct LLVMOpaqueValue *LLVMValueRef;
 */
 typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;

+/**
+ * Represents an LLVM Metadata.
+ *
+ * This models llvm::Metadata.
+ */
+typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
+
 /**
 * Represents an LLVM basic block builder.
 *
@ -89,6 +96,13 @@ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
 */
 typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;

+/**
+ * Represents an LLVM debug info builder.
+ *
+ * This models llvm::DIBuilder.
+ */
+typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef;
+
 /**
 * Interface used to provide a module to JIT or interpreter.
 * This is now just a synonym for llvm::Module, but we have to keep using the
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@ -189,17 +189,17 @@ private:
  void initSlowCase(const APInt &that);

  /// out-of-line slow case for shl
-  APInt shlSlowCase(unsigned shiftAmt) const;
+  void shlSlowCase(unsigned ShiftAmt);
+
+  /// out-of-line slow case for lshr.
+  void lshrSlowCase(unsigned ShiftAmt);

  /// out-of-line slow case for operator=
-  APInt &AssignSlowCase(const APInt &RHS);
+  void AssignSlowCase(const APInt &RHS);

  /// out-of-line slow case for operator==
  bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY;

-  /// out-of-line slow case for operator==
-  bool EqualSlowCase(uint64_t Val) const LLVM_READONLY;
-
  /// out-of-line slow case for countLeadingZeros
  unsigned countLeadingZerosSlowCase() const LLVM_READONLY;

@ -209,6 +209,12 @@ private:
  /// out-of-line slow case for countPopulation
  unsigned countPopulationSlowCase() const LLVM_READONLY;

+  /// out-of-line slow case for intersects.
+  bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+  /// out-of-line slow case for isSubsetOf.
+  bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
+
  /// out-of-line slow case for setBits.
  void setBitsSlowCase(unsigned loBit, unsigned hiBit);

@ -216,13 +222,13 @@ private:
  void flipAllBitsSlowCase();

  /// out-of-line slow case for operator&=.
-  APInt& AndAssignSlowCase(const APInt& RHS);
+  void AndAssignSlowCase(const APInt& RHS);

  /// out-of-line slow case for operator|=.
-  APInt& OrAssignSlowCase(const APInt& RHS);
+  void OrAssignSlowCase(const APInt& RHS);

  /// out-of-line slow case for operator^=.
-  APInt& XorAssignSlowCase(const APInt& RHS);
+  void XorAssignSlowCase(const APInt& RHS);

 public:
  /// \name Constructors
@ -330,6 +336,20 @@ public:
  /// This tests the high bit of the APInt to determine if it is unset.
  bool isNonNegative() const { return !isNegative(); }

+  /// \brief Determine if sign bit of this APInt is set.
+  ///
+  /// This tests the high bit of this APInt to determine if it is set.
+  ///
+  /// \returns true if this APInt has its sign bit set, false otherwise.
+  bool isSignBitSet() const { return (*this)[BitWidth-1]; }
+
+  /// \brief Determine if sign bit of this APInt is clear.
+  ///
+  /// This tests the high bit of this APInt to determine if it is clear.
+  ///
+  /// \returns true if this APInt has its sign bit clear, false otherwise.
+  bool isSignBitClear() const { return !isSignBitSet(); }
+
  /// \brief Determine if this APInt Value is positive.
  ///
  /// This tests if the value of this APInt is positive (> 0). Note
@ -396,10 +416,10 @@ public:
    return countPopulationSlowCase() == 1;
  }

-  /// \brief Check if the APInt's value is returned by getSignBit.
+  /// \brief Check if the APInt's value is returned by getSignMask.
  ///
-  /// \returns true if this is the value returned by getSignBit.
-  bool isSignBit() const { return isMinSignedValue(); }
+  /// \returns true if this is the value returned by getSignMask.
+  bool isSignMask() const { return isMinSignedValue(); }

  /// \brief Convert APInt to a boolean value.
  ///
@ -409,8 +429,7 @@ public:
  /// If this value is smaller than the specified limit, return it, otherwise
  /// return the limit value.  This causes the value to saturate to the limit.
  uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) const {
-    return (getActiveBits() > 64 || getZExtValue() > Limit) ? Limit
-                                                            : getZExtValue();
+    return ugt(Limit) ? Limit : getZExtValue();
  }

  /// \brief Check if the APInt consists of a repeated bit pattern.
@ -427,8 +446,9 @@ public:
    assert(numBits <= BitWidth && "numBits out of range");
    if (isSingleWord())
      return VAL == (UINT64_MAX >> (APINT_BITS_PER_WORD - numBits));
-    unsigned Ones = countTrailingOnes();
-    return (numBits == Ones) && ((Ones + countLeadingZeros()) == BitWidth);
+    unsigned Ones = countTrailingOnesSlowCase();
+    return (numBits == Ones) &&
+           ((Ones + countLeadingZerosSlowCase()) == BitWidth);
  }

  /// \returns true if this APInt is a non-empty sequence of ones starting at
@ -437,8 +457,8 @@ public:
  bool isMask() const {
    if (isSingleWord())
      return isMask_64(VAL);
-    unsigned Ones = countTrailingOnes();
-    return (Ones > 0) && ((Ones + countLeadingZeros()) == BitWidth);
+    unsigned Ones = countTrailingOnesSlowCase();
+    return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
  }

  /// \brief Return true if this APInt value contains a sequence of ones with
@ -446,8 +466,9 @@ public:
  bool isShiftedMask() const {
    if (isSingleWord())
      return isShiftedMask_64(VAL);
-    unsigned Ones = countPopulation();
-    return (Ones + countTrailingZeros() + countLeadingZeros()) == BitWidth;
+    unsigned Ones = countPopulationSlowCase();
+    unsigned LeadZ = countLeadingZerosSlowCase();
+    return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
  }

  /// @}
@ -476,11 +497,11 @@ public:
    return API;
  }

-  /// \brief Get the SignBit for a specific bit width.
+  /// \brief Get the SignMask for a specific bit width.
  ///
  /// This is just a wrapper function of getSignedMinValue(), and it helps code
-  /// readability when we want to get a SignBit.
-  static APInt getSignBit(unsigned BitWidth) {
+  /// readability when we want to get a SignMask.
+  static APInt getSignMask(unsigned BitWidth) {
    return getSignedMinValue(BitWidth);
  }

@ -674,29 +695,22 @@ public:
      return clearUnusedBits();
    }

-    return AssignSlowCase(RHS);
+    AssignSlowCase(RHS);
+    return *this;
  }

  /// @brief Move assignment operator.
  APInt &operator=(APInt &&that) {
-    if (!isSingleWord()) {
-      // The MSVC STL shipped in 2013 requires that self move assignment be a
-      // no-op.  Otherwise algorithms like stable_sort will produce answers
-      // where half of the output is left in a moved-from state.
-      if (this == &that)
-        return *this;
+    assert(this != &that && "Self-move not supported");
+    if (!isSingleWord())
      delete[] pVal;
-    }

    // Use memcpy so that type based alias analysis sees both VAL and pVal
    // as modified.
    memcpy(&VAL, &that.VAL, sizeof(uint64_t));

-    // If 'this == &that', avoid zeroing our own bitwidth by storing to 'that'
-    // first.
-    unsigned ThatBitWidth = that.BitWidth;
+    BitWidth = that.BitWidth;
    that.BitWidth = 0;
-    BitWidth = ThatBitWidth;

    return *this;
  }
@ -727,11 +741,11 @@ public:
  /// \returns *this after ANDing with RHS.
  APInt &operator&=(const APInt &RHS) {
    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-    if (isSingleWord()) {
+    if (isSingleWord())
      VAL &= RHS.VAL;
-      return *this;
-    }
-    return AndAssignSlowCase(RHS);
+    else
+      AndAssignSlowCase(RHS);
+    return *this;
  }

  /// \brief Bitwise AND assignment operator.
@ -757,11 +771,11 @@ public:
  /// \returns *this after ORing with RHS.
  APInt &operator|=(const APInt &RHS) {
    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-    if (isSingleWord()) {
+    if (isSingleWord())
      VAL |= RHS.VAL;
-      return *this;
-    }
-    return OrAssignSlowCase(RHS);
+    else
+      OrAssignSlowCase(RHS);
+    return *this;
  }

  /// \brief Bitwise OR assignment operator.
@ -787,11 +801,11 @@ public:
  /// \returns *this after XORing with RHS.
  APInt &operator^=(const APInt &RHS) {
    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-    if (isSingleWord()) {
+    if (isSingleWord())
      VAL ^= RHS.VAL;
-      return *this;
-    }
-    return XorAssignSlowCase(RHS);
+    else
+      XorAssignSlowCase(RHS);
+    return *this;
  }

  /// \brief Bitwise XOR assignment operator.
@ -836,9 +850,17 @@ public:
  ///
  /// Shifts *this left by shiftAmt and assigns the result to *this.
  ///
-  /// \returns *this after shifting left by shiftAmt
-  APInt &operator<<=(unsigned shiftAmt) {
-    *this = shl(shiftAmt);
+  /// \returns *this after shifting left by ShiftAmt
+  APInt &operator<<=(unsigned ShiftAmt) {
+    assert(ShiftAmt <= BitWidth && "Invalid shift amount");
+    if (isSingleWord()) {
+      if (ShiftAmt == BitWidth)
+        VAL = 0;
+      else
+        VAL <<= ShiftAmt;
+      return clearUnusedBits();
+    }
+    shlSlowCase(ShiftAmt);
    return *this;
  }

@ -875,20 +897,26 @@ public:
    return R;
  }

-  /// Logical right-shift this APInt by shiftAmt in place.
-  void lshrInPlace(unsigned shiftAmt);
+  /// Logical right-shift this APInt by ShiftAmt in place.
+  void lshrInPlace(unsigned ShiftAmt) {
+    assert(ShiftAmt <= BitWidth && "Invalid shift amount");
+    if (isSingleWord()) {
+      if (ShiftAmt == BitWidth)
+        VAL = 0;
+      else
+        VAL >>= ShiftAmt;
+      return;
+    }
+    lshrSlowCase(ShiftAmt);
+  }

  /// \brief Left-shift function.
  ///
  /// Left-shift this APInt by shiftAmt.
  APInt shl(unsigned shiftAmt) const {
-    assert(shiftAmt <= BitWidth && "Invalid shift amount");
-    if (isSingleWord()) {
-      if (shiftAmt >= BitWidth)
-        return APInt(BitWidth, 0); // avoid undefined shift results
-      return APInt(BitWidth, VAL << shiftAmt);
-    }
-    return shlSlowCase(shiftAmt);
+    APInt R(*this);
+    R <<= shiftAmt;
+    return R;
  }

  /// \brief Rotate left by rotateAmt.
@ -905,7 +933,14 @@ public:
  /// \brief Logical right-shift function.
  ///
  /// Logical right-shift this APInt by shiftAmt.
-  APInt lshr(const APInt &shiftAmt) const;
+  APInt lshr(const APInt &ShiftAmt) const {
+    APInt R(*this);
+    R.lshrInPlace(ShiftAmt);
+    return R;
+  }
+
+  /// Logical right-shift this APInt by ShiftAmt in place.
+  void lshrInPlace(const APInt &ShiftAmt);

  /// \brief Left-shift function.
  ///
@ -1003,9 +1038,7 @@ public:
  ///
  /// \returns true if *this == Val
  bool operator==(uint64_t Val) const {
-    if (isSingleWord())
-      return VAL == Val;
-    return EqualSlowCase(Val);
+    return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
  }

  /// \brief Equality comparison.
@ -1055,7 +1088,8 @@ public:
  ///
  /// \returns true if *this < RHS when considered unsigned.
  bool ult(uint64_t RHS) const {
-    return getActiveBits() > 64 ? false : getZExtValue() < RHS;
+    // Only need to check active bits if not a single word.
+    return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
  }

  /// \brief Signed less than comparison
@ -1073,7 +1107,8 @@ public:
  ///
  /// \returns true if *this < RHS when considered signed.
  bool slt(int64_t RHS) const {
-    return getMinSignedBits() > 64 ? isNegative() : getSExtValue() < RHS;
+    return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
+                                                        : getSExtValue() < RHS;
  }

  /// \brief Unsigned less or equal comparison
@ -1123,7 +1158,8 @@ public:
  ///
  /// \returns true if *this > RHS when considered unsigned.
  bool ugt(uint64_t RHS) const {
-    return getActiveBits() > 64 ? true : getZExtValue() > RHS;
+    // Only need to check active bits if not a single word.
+    return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
  }

  /// \brief Signed greather than comparison
@ -1141,7 +1177,8 @@ public:
  ///
  /// \returns true if *this > RHS when considered signed.
  bool sgt(int64_t RHS) const {
-    return getMinSignedBits() > 64 ? !isNegative() : getSExtValue() > RHS;
+    return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
+                                                        : getSExtValue() > RHS;
  }

  /// \brief Unsigned greater or equal comparison
@ -1179,9 +1216,18 @@ public:
  /// This operation tests if there are any pairs of corresponding bits
  /// between this APInt and RHS that are both set.
  bool intersects(const APInt &RHS) const {
-    APInt temp(*this);
-    temp &= RHS;
-    return temp != 0;
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return (VAL & RHS.VAL) != 0;
+    return intersectsSlowCase(RHS);
+  }
+
+  /// This operation checks that all bits set in this APInt are also set in RHS.
+  bool isSubsetOf(const APInt &RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return (VAL & ~RHS.VAL) == 0;
+    return isSubsetOfSlowCase(RHS);
  }

  /// @}
@ -1404,8 +1450,7 @@ public:
  /// int64_t. Otherwise an assertion will result.
  int64_t getSExtValue() const {
    if (isSingleWord())
-      return int64_t(VAL << (APINT_BITS_PER_WORD - BitWidth)) >>
-             (APINT_BITS_PER_WORD - BitWidth);
+      return SignExtend64(VAL, BitWidth);
    assert(getMinSignedBits() <= 64 && "Too many bits for int64_t");
    return int64_t(pVal[0]);
  }
@ -1759,13 +1804,13 @@ public:
                      WordType *remainder, WordType *scratch,
                      unsigned parts);

-  /// Shift a bignum left COUNT bits.  Shifted in bits are zero.  There are no
-  /// restrictions on COUNT.
-  static void tcShiftLeft(WordType *, unsigned parts, unsigned count);
+  /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
+  /// restrictions on Count.
+  static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);

-  /// Shift a bignum right COUNT bits.  Shifted in bits are zero.  There are no
-  /// restrictions on COUNT.
-  static void tcShiftRight(WordType *, unsigned parts, unsigned count);
+  /// Shift a bignum right Count bits.  Shifted in bits are zero.  There are no
+  /// restrictions on Count.
+  static void tcShiftRight(WordType *, unsigned Words, unsigned Count);

  /// The obvious AND, OR and XOR and complement operations.
  static void tcAnd(WordType *, const WordType *, unsigned);
@ -1959,7 +2004,7 @@ inline const APInt &umax(const APInt &A, const APInt &B) {
 /// \brief Compute GCD of two unsigned APInt values.
 ///
 /// This function returns the greatest common divisor of the two APInt values
-/// using Euclid's algorithm.
+/// using Stein's algorithm.
 ///
 /// \returns the greatest common divisor of A and B.
 APInt GreatestCommonDivisor(APInt A, APInt B);
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@ -14,6 +14,8 @@
 #ifndef LLVM_ADT_BITVECTOR_H
 #define LLVM_ADT_BITVECTOR_H

+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
@ -455,6 +457,105 @@ public:
    return *this;
  }

+  BitVector &operator>>=(unsigned N) {
+    assert(N <= Size);
+    if (LLVM_UNLIKELY(empty() || N == 0))
+      return *this;
+
+    unsigned NumWords = NumBitWords(Size);
+    assert(NumWords >= 1);
+
+    wordShr(N / BITWORD_SIZE);
+
+    unsigned BitDistance = N % BITWORD_SIZE;
+    if (BitDistance == 0)
+      return *this;
+
+    // When the shift size is not a multiple of the word size, then we have
+    // a tricky situation where each word in succession needs to extract some
+    // of the bits from the next word and or them into this word while
+    // shifting this word to make room for the new bits.  This has to be done
+    // for every word in the array.
+
+    // Since we're shifting each word right, some bits will fall off the end
+    // of each word to the right, and empty space will be created on the left.
+    // The final word in the array will lose bits permanently, so starting at
+    // the beginning, work forwards shifting each word to the right, and
+    // OR'ing in the bits from the end of the next word to the beginning of
+    // the current word.
+
+    // Example:
+    //   Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting right
+    //   by 4 bits.
+    // Step 1: Word[0] >>= 4           ; 0x0ABBCCDD
+    // Step 2: Word[0] |= 0x10000000   ; 0x1ABBCCDD
+    // Step 3: Word[1] >>= 4           ; 0x0EEFF001
+    // Step 4: Word[1] |= 0x50000000   ; 0x5EEFF001
+    // Step 5: Word[2] >>= 4           ; 0x02334455
+    // Result: { 0x1ABBCCDD, 0x5EEFF001, 0x02334455 }
+    const BitWord Mask = maskTrailingOnes<BitWord>(BitDistance);
+    const unsigned LSH = BITWORD_SIZE - BitDistance;
+
+    for (unsigned I = 0; I < NumWords - 1; ++I) {
+      Bits[I] >>= BitDistance;
+      Bits[I] |= (Bits[I + 1] & Mask) << LSH;
+    }
+
+    Bits[NumWords - 1] >>= BitDistance;
+
+    return *this;
+  }
+
+  BitVector &operator<<=(unsigned N) {
+    assert(N <= Size);
+    if (LLVM_UNLIKELY(empty() || N == 0))
+      return *this;
+
+    unsigned NumWords = NumBitWords(Size);
+    assert(NumWords >= 1);
+
+    wordShl(N / BITWORD_SIZE);
+
+    unsigned BitDistance = N % BITWORD_SIZE;
+    if (BitDistance == 0)
+      return *this;
+
+    // When the shift size is not a multiple of the word size, then we have
+    // a tricky situation where each word in succession needs to extract some
+    // of the bits from the previous word and or them into this word while
+    // shifting this word to make room for the new bits.  This has to be done
+    // for every word in the array.  This is similar to the algorithm outlined
+    // in operator>>=, but backwards.
+
+    // Since we're shifting each word left, some bits will fall off the end
+    // of each word to the left, and empty space will be created on the right.
+    // The first word in the array will lose bits permanently, so starting at
+    // the end, work backwards shifting each word to the left, and OR'ing
+    // in the bits from the end of the next word to the beginning of the
+    // current word.
+
+    // Example:
+    //   Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting left
+    //   by 4 bits.
+    // Step 1: Word[2] <<= 4           ; 0x23344550
+    // Step 2: Word[2] |= 0x0000000E   ; 0x2334455E
+    // Step 3: Word[1] <<= 4           ; 0xEFF00110
+    // Step 4: Word[1] |= 0x0000000A   ; 0xEFF0011A
+    // Step 5: Word[0] <<= 4           ; 0xABBCCDD0
+    // Result: { 0xABBCCDD0, 0xEFF0011A, 0x2334455E }
+    const BitWord Mask = maskLeadingOnes<BitWord>(BitDistance);
+    const unsigned RSH = BITWORD_SIZE - BitDistance;
+
+    for (int I = NumWords - 1; I > 0; --I) {
+      Bits[I] <<= BitDistance;
+      Bits[I] |= (Bits[I - 1] & Mask) >> RSH;
+    }
+    Bits[0] <<= BitDistance;
+    clear_unused_bits();
+
+    return *this;
+  }
+
  // Assignment operator.
  const BitVector &operator=(const BitVector &RHS) {
    if (this == &RHS) return *this;
@ -538,6 +639,54 @@ public:
  }

 private:
+  /// \brief Perform a logical left shift of \p Count words by moving everything
+  /// \p Count words to the right in memory.
+  ///
+  /// While confusing, words are stored from least significant at Bits[0] to
+  /// most significant at Bits[NumWords-1].  A logical shift left, however,
+  /// moves the current least significant bit to a higher logical index, and
+  /// fills the previous least significant bits with 0.  Thus, we actually
+  /// need to move the bytes of the memory to the right, not to the left.
+  /// Example:
+  ///   Words = [0xBBBBAAAA, 0xDDDDFFFF, 0x00000000, 0xDDDD0000]
+  /// represents a BitVector where 0xBBBBAAAA contain the least significant
+  /// bits.  So if we want to shift the BitVector left by 2 words, we need to
+  /// turn this into 0x00000000 0x00000000 0xBBBBAAAA 0xDDDDFFFF by using a
+  /// memmove which moves right, not left.
+  void wordShl(uint32_t Count) {
+    if (Count == 0)
+      return;
+
+    uint32_t NumWords = NumBitWords(Size);
+
+    auto Src = ArrayRef<BitWord>(Bits, NumWords).drop_back(Count);
+    auto Dest = MutableArrayRef<BitWord>(Bits, NumWords).drop_front(Count);
+
+    // Since we always move Word-sized chunks of data with src and dest both
+    // aligned to a word-boundary, we don't need to worry about endianness
+    // here.
+    std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord));
+    std::memset(Bits, 0, Count * sizeof(BitWord));
+    clear_unused_bits();
+  }
+
+  /// \brief Perform a logical right shift of \p Count words by moving those
+  /// words to the left in memory.  See wordShl for more information.
+  ///
+  void wordShr(uint32_t Count) {
+    if (Count == 0)
+      return;
+
+    uint32_t NumWords = NumBitWords(Size);
+
+    auto Src = ArrayRef<BitWord>(Bits, NumWords).drop_front(Count);
+    auto Dest = MutableArrayRef<BitWord>(Bits, NumWords).drop_back(Count);
+    assert(Dest.size() == Src.size());
+
+    std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord));
+    std::memset(Dest.end(), 0, Count * sizeof(BitWord));
+  }
+
  int next_unset_in_word(int WordIndex, BitWord Word) const {
    unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word);
    return Result < size() ? Result : -1;
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@ -508,6 +508,22 @@ public:
    return *this;
  }

+  SmallBitVector &operator<<=(unsigned N) {
+    if (isSmall())
+      setSmallBits(getSmallBits() << N);
+    else
+      getPointer()->operator<<=(N);
+    return *this;
+  }
+
+  SmallBitVector &operator>>=(unsigned N) {
+    if (isSmall())
+      setSmallBits(getSmallBits() >> N);
+    else
+      getPointer()->operator>>=(N);
+    return *this;
+  }
+
  // Assignment operator.
  const SmallBitVector &operator=(const SmallBitVector &RHS) {
    if (isSmall()) {
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@ -1164,9 +1164,8 @@ template <class BT> struct BlockEdgesAdder {
  void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr,
                  const LoopData *OuterLoop) {
    const BlockT *BB = BFI.RPOT[Irr.Node.Index];
-    for (auto I = Successor::child_begin(BB), E = Successor::child_end(BB);
-         I != E; ++I)
-      G.addEdge(Irr, BFI.getNode(*I), OuterLoop);
+    for (const auto Succ : children<const BlockT *>(BB))
+      G.addEdge(Irr, BFI.getNode(Succ), OuterLoop);
  }
 };
 }
@ -1210,10 +1209,9 @@ BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
      return false;
  } else {
    const BlockT *BB = getBlock(Node);
-    for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB);
-         SI != SE; ++SI)
-      if (!addToDist(Dist, OuterLoop, Node, getNode(*SI),
-                     getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
+    for (const auto Succ : children<const BlockT *>(BB))
+      if (!addToDist(Dist, OuterLoop, Node, getNode(Succ),
+                     getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ))))
        // Irreducible backedge.
        return false;
  }
--- a/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/include/llvm/Analysis/DominanceFrontierImpl.h
@ -174,12 +174,10 @@ ForwardDominanceFrontierBase<BlockT>::calculate(const DomTreeT &DT,
    // Visit each block only once.
    if (visited.insert(currentBB).second) {
      // Loop over CFG successors to calculate DFlocal[currentNode]
-      for (auto SI = BlockTraits::child_begin(currentBB),
-                SE = BlockTraits::child_end(currentBB);
-           SI != SE; ++SI) {
+      for (const auto Succ : children<BlockT *>(currentBB)) {
        // Does Node immediately dominate this successor?
-        if (DT[*SI]->getIDom() != currentNode)
-          S.insert(*SI);
+        if (DT[Succ]->getIDom() != currentNode)
+          S.insert(Succ);
      }
    }

--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@ -158,11 +158,8 @@ public:
  /// True if terminator in the block can branch to another block that is
  /// outside of the current loop.
  bool isLoopExiting(const BlockT *BB) const {
-    typedef GraphTraits<const BlockT*> BlockTraits;
-    for (typename BlockTraits::ChildIteratorType SI =
-         BlockTraits::child_begin(BB),
-         SE = BlockTraits::child_end(BB); SI != SE; ++SI) {
-      if (!contains(*SI))
+    for (const auto Succ : children<const BlockT*>(BB)) {
+      if (!contains(Succ))
        return true;
    }
    return false;
@ -186,11 +183,8 @@ public:
    unsigned NumBackEdges = 0;
    BlockT *H = getHeader();

-    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-    for (typename InvBlockTraits::ChildIteratorType I =
-         InvBlockTraits::child_begin(H),
-         E = InvBlockTraits::child_end(H); I != E; ++I)
-      if (contains(*I))
+    for (const auto Pred : children<Inverse<BlockT*> >(H))
+      if (contains(Pred))
        ++NumBackEdges;

    return NumBackEdges;
@ -249,12 +243,9 @@ public:
  /// contains a branch back to the header.
  void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
    BlockT *H = getHeader();
-    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-    for (typename InvBlockTraits::ChildIteratorType I =
-         InvBlockTraits::child_begin(H),
-         E = InvBlockTraits::child_end(H); I != E; ++I)
-      if (contains(*I))
-        LoopLatches.push_back(*I);
+    for (const auto Pred : children<Inverse<BlockT*>>(H))
+      if (contains(Pred))
+        LoopLatches.push_back(Pred);
  }

  //===--------------------------------------------------------------------===//
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@ -34,14 +34,11 @@ namespace llvm {
 template<class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::
 getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const {
-  typedef GraphTraits<BlockT*> BlockTraits;
-  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
-    for (typename BlockTraits::ChildIteratorType I =
-           BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
-         I != E; ++I)
-      if (!contains(*I)) {
+  for (const auto BB : blocks())
+    for (const auto Succ : children<BlockT*>(BB))
+      if (!contains(Succ)) {
        // Not in current loop? It must be an exit block.
-        ExitingBlocks.push_back(*BI);
+        ExitingBlocks.push_back(BB);
        break;
      }
 }
@ -63,14 +60,11 @@ BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const {
 template<class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::
 getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const {
-  typedef GraphTraits<BlockT*> BlockTraits;
-  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
-    for (typename BlockTraits::ChildIteratorType I =
-           BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
-         I != E; ++I)
-      if (!contains(*I))
+  for (const auto BB : blocks())
+    for (const auto Succ : children<BlockT*>(BB))
+      if (!contains(Succ))
        // Not in current loop? It must be an exit block.
-        ExitBlocks.push_back(*I);
+        ExitBlocks.push_back(Succ);
 }

 /// getExitBlock - If getExitBlocks would return exactly one block,
@ -88,14 +82,11 @@ BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const {
 template<class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::
 getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const {
-  typedef GraphTraits<BlockT*> BlockTraits;
-  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
-    for (typename BlockTraits::ChildIteratorType I =
-           BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
-         I != E; ++I)
-      if (!contains(*I))
+  for (const auto BB : blocks())
+    for (const auto Succ : children<BlockT*>(BB))
+      if (!contains(Succ))
        // Not in current loop? It must be an exit block.
-        ExitEdges.push_back(Edge(*BI, *I));
+        ExitEdges.emplace_back(BB, Succ);
 }

 /// getLoopPreheader - If there is a preheader for this loop, return it.  A
@ -134,15 +125,11 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {

  // Loop over the predecessors of the header node...
  BlockT *Header = getHeader();
-  typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-  for (typename InvBlockTraits::ChildIteratorType PI =
-         InvBlockTraits::child_begin(Header),
-         PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
-    typename InvBlockTraits::NodeRef N = *PI;
-    if (!contains(N)) {     // If the block is not in the loop...
-      if (Out && Out != N)
+  for (const auto Pred : children<Inverse<BlockT*>>(Header)) {
+    if (!contains(Pred)) {     // If the block is not in the loop...
+      if (Out && Out != Pred)
        return nullptr;     // Multiple predecessors outside the loop
-      Out = N;
+      Out = Pred;
    }
  }

@ -156,17 +143,11 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {
 template<class BlockT, class LoopT>
 BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const {
  BlockT *Header = getHeader();
-  typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-  typename InvBlockTraits::ChildIteratorType PI =
-    InvBlockTraits::child_begin(Header);
-  typename InvBlockTraits::ChildIteratorType PE =
-    InvBlockTraits::child_end(Header);
  BlockT *Latch = nullptr;
-  for (; PI != PE; ++PI) {
-    typename InvBlockTraits::NodeRef N = *PI;
-    if (contains(N)) {
+  for (const auto Pred : children<Inverse<BlockT*>>(Header)) {
+    if (contains(Pred)) {
      if (Latch) return nullptr;
-      Latch = N;
+      Latch = Pred;
    }
  }

@ -394,11 +375,9 @@ static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges,
      // within this subloop tree itself. Note that a predecessor may directly
      // reach another subloop that is not yet discovered to be a subloop of
      // this loop, which we must traverse.
-      for (typename InvBlockTraits::ChildIteratorType PI =
-             InvBlockTraits::child_begin(PredBB),
-             PE = InvBlockTraits::child_end(PredBB); PI != PE; ++PI) {
-        if (LI->getLoopFor(*PI) != Subloop)
-          ReverseCFGWorklist.push_back(*PI);
+      for (const auto Pred : children<Inverse<BlockT*>>(PredBB)) {
+        if (LI->getLoopFor(Pred) != Subloop)
+          ReverseCFGWorklist.push_back(Pred);
      }
    }
  }
@ -482,13 +461,7 @@ analyze(const DominatorTreeBase<BlockT> &DomTree) {
    SmallVector<BlockT *, 4> Backedges;

    // Check each predecessor of the potential loop header.
-    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-    for (typename InvBlockTraits::ChildIteratorType PI =
-           InvBlockTraits::child_begin(Header),
-           PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
-
-      BlockT *Backedge = *PI;
-
+    for (const auto Backedge : children<Inverse<BlockT*>>(Header)) {
      // If Header dominates predBB, this is a new loop. Collect the backedges.
      if (DomTree.dominates(Header, Backedge)
          && DomTree.isReachableFromEntry(Backedge)) {
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@ -53,6 +53,11 @@ bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                    bool LookThroughBitCast = false);

+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory similar to malloc or calloc.
+bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                            bool LookThroughBitCast = false);
+
 /// \brief Tests if a value is a call or invoke to a library function that
 /// allocates memory (either malloc, calloc, or strdup like).
 bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@ -1159,8 +1159,20 @@ public:
  const SCEV *getConstant(const APInt &Val);
  const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
  const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
+
+  typedef SmallDenseMap<std::pair<const SCEV *, Type *>, const SCEV *, 8>
+      ExtendCacheTy;
  const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getZeroExtendExprCached(const SCEV *Op, Type *Ty,
+                                      ExtendCacheTy &Cache);
+  const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
+                                    ExtendCacheTy &Cache);
+
  const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getSignExtendExprCached(const SCEV *Op, Type *Ty,
+                                      ExtendCacheTy &Cache);
+  const SCEV *getSignExtendExprImpl(const SCEV *Op, Type *Ty,
+                                    ExtendCacheTy &Cache);
  const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
  const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@ -46,6 +46,9 @@ namespace llvm {
    ArrayRef<uint8_t> Buffer;
    StringRef ModuleIdentifier;

+    // The string table used to interpret this module.
+    StringRef Strtab;
+
    // The bitstream location of the IDENTIFICATION_BLOCK.
    uint64_t IdentificationBit;

@ -70,6 +73,7 @@ namespace llvm {
    StringRef getBuffer() const {
      return StringRef((const char *)Buffer.begin(), Buffer.size());
    }
+    StringRef getStrtab() const { return Strtab; }

    StringRef getModuleIdentifier() const { return ModuleIdentifier; }

--- a/include/llvm/Bitcode/BitcodeWriter.h
+++ b/include/llvm/Bitcode/BitcodeWriter.h
@ -15,6 +15,7 @@
 #define LLVM_BITCODE_BITCODEWRITER_H

 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/MC/StringTableBuilder.h"
 #include <string>

 namespace llvm {
@ -26,12 +27,25 @@ namespace llvm {
    SmallVectorImpl<char> &Buffer;
    std::unique_ptr<BitstreamWriter> Stream;

+    StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
+    bool WroteStrtab = false;
+
+    void writeBlob(unsigned Block, unsigned Record, StringRef Blob);
+
   public:
    /// Create a BitcodeWriter that writes to Buffer.
    BitcodeWriter(SmallVectorImpl<char> &Buffer);

    ~BitcodeWriter();

+    /// Write the bitcode file's string table. This must be called exactly once
+    /// after all modules have been written.
+    void writeStrtab();
+
+    /// Copy the string table for another module into this bitcode file. This
+    /// should be called after copying the module itself into the bitcode file.
+    void copyStrtab(StringRef Strtab);
+
    /// Write the specified module to the buffer specified at construction time.
    ///
    /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@ -22,7 +22,7 @@

 namespace llvm {
 namespace bitc {
-// The only top-level block type defined is for a module.
+// The only top-level block types are MODULE, IDENTIFICATION and STRTAB.
 enum BlockIDs {
  // Blocks
  MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
@ -52,7 +52,9 @@ enum BlockIDs {

  OPERAND_BUNDLE_TAGS_BLOCK_ID,

-  METADATA_KIND_BLOCK_ID
+  METADATA_KIND_BLOCK_ID,
+
+  STRTAB_BLOCK_ID,
 };

 /// Identification block contains a string that describes the producer details,
@ -232,6 +234,10 @@ enum GlobalValueSummarySymtabCodes {
  // llvm.type.checked.load intrinsic with all constant integer arguments.
  // [typeid, offset, n x arg]
  FS_TYPE_CHECKED_LOAD_CONST_VCALL = 15,
+  // Assigns a GUID to a value ID. This normally appears only in combined
+  // summaries, but it can also appear in per-module summaries for PGO data.
+  // [valueid, guid]
+  FS_VALUE_GUID = 16,
 };

 enum MetadataCodes {
@ -550,6 +556,10 @@ enum ComdatSelectionKindCodes {
  COMDAT_SELECTION_KIND_SAME_SIZE = 5,
 };

+enum StrtabCodes {
+  STRTAB_BLOB = 1,
+};
+
 } // End bitc namespace
 } // End llvm namespace

--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@ -62,9 +62,6 @@ protected:
                                        const TargetRegisterInfo &TRI,
                                        const RegisterBankInfo &RBI) const;

-  Optional<int64_t> getConstantVRegVal(unsigned VReg,
-                                       const MachineRegisterInfo &MRI) const;
-
  bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
                         const MachineRegisterInfo &MRI) const;

--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@ -60,5 +60,8 @@ void reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                        const char *PassName, StringRef Msg,
                        const MachineInstr &MI);

+Optional<int64_t> getConstantVRegVal(unsigned VReg,
+                                     const MachineRegisterInfo &MRI);
+
 } // End namespace llvm.
 #endif
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@ -413,6 +413,11 @@ MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                            unsigned Reg, unsigned Offset,
                            const MDNode *Variable, const MDNode *Expr);

+/// Clone a DBG_VALUE whose value has been spilled to FrameIndex.
+MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB,
+                                    MachineBasicBlock::iterator I,
+                                    const MachineInstr &Orig, int FrameIndex);
+
 inline unsigned getDefRegState(bool B) {
  return B ? RegState::Define : 0;
 }
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@ -28,155 +28,246 @@ namespace llvm {
  /// type can be represented by an MVT.
 class MVT {
  public:
-    enum SimpleValueType : int8_t {
-      // Simple value types less than zero are considered extended value types.
-      INVALID_SIMPLE_VALUE_TYPE = -1,
+    enum SimpleValueType : uint8_t {
+      // Simple value types that aren't explicitly part of this enumeration
+      // are considered extended value types.
+      INVALID_SIMPLE_VALUE_TYPE = 0,

      // If you change this numbering, you must change the values in
      // ValueTypes.td as well!
-      Other          =   0,   // This is a non-standard value
-      i1             =   1,   // This is a 1 bit integer value
-      i8             =   2,   // This is an 8 bit integer value
-      i16            =   3,   // This is a 16 bit integer value
-      i32            =   4,   // This is a 32 bit integer value
-      i64            =   5,   // This is a 64 bit integer value
-      i128           =   6,   // This is a 128 bit integer value
+      Other          =   1,   // This is a non-standard value
+      i1             =   2,   // This is a 1 bit integer value
+      i8             =   3,   // This is an 8 bit integer value
+      i16            =   4,   // This is a 16 bit integer value
+      i32            =   5,   // This is a 32 bit integer value
+      i64            =   6,   // This is a 64 bit integer value
+      i128           =   7,   // This is a 128 bit integer value

      FIRST_INTEGER_VALUETYPE = i1,
      LAST_INTEGER_VALUETYPE  = i128,

-      f16            =   7,   // This is a 16 bit floating point value
-      f32            =   8,   // This is a 32 bit floating point value
-      f64            =   9,   // This is a 64 bit floating point value
-      f80            =  10,   // This is a 80 bit floating point value
-      f128           =  11,   // This is a 128 bit floating point value
-      ppcf128        =  12,   // This is a PPC 128-bit floating point value
+      f16            =   8,   // This is a 16 bit floating point value
+      f32            =   9,   // This is a 32 bit floating point value
+      f64            =  10,   // This is a 64 bit floating point value
+      f80            =  11,   // This is a 80 bit floating point value
+      f128           =  12,   // This is a 128 bit floating point value
+      ppcf128        =  13,   // This is a PPC 128-bit floating point value

      FIRST_FP_VALUETYPE = f16,
      LAST_FP_VALUETYPE  = ppcf128,

-      v2i1           =  13,   //    2 x i1
-      v4i1           =  14,   //    4 x i1
-      v8i1           =  15,   //    8 x i1
-      v16i1          =  16,   //   16 x i1
-      v32i1          =  17,   //   32 x i1
-      v64i1          =  18,   //   64 x i1
-      v512i1         =  19,   //  512 x i1
-      v1024i1        =  20,   // 1024 x i1
+      v2i1           =  14,   //    2 x i1
+      v4i1           =  15,   //    4 x i1
+      v8i1           =  16,   //    8 x i1
+      v16i1          =  17,   //   16 x i1
+      v32i1          =  18,   //   32 x i1
+      v64i1          =  19,   //   64 x i1
+      v512i1         =  20,   //  512 x i1
+      v1024i1        =  21,   // 1024 x i1

-      v1i8           =  21,   //  1 x i8
-      v2i8           =  22,   //  2 x i8
-      v4i8           =  23,   //  4 x i8
-      v8i8           =  24,   //  8 x i8
-      v16i8          =  25,   // 16 x i8
-      v32i8          =  26,   // 32 x i8
-      v64i8          =  27,   // 64 x i8
-      v128i8         =  28,   //128 x i8
-      v256i8         =  29,   //256 x i8
+      v1i8           =  22,   //  1 x i8
+      v2i8           =  23,   //  2 x i8
+      v4i8           =  24,   //  4 x i8
+      v8i8           =  25,   //  8 x i8
+      v16i8          =  26,   // 16 x i8
+      v32i8          =  27,   // 32 x i8
+      v64i8          =  28,   // 64 x i8
+      v128i8         =  29,   //128 x i8
+      v256i8         =  30,   //256 x i8

-      v1i16          =  30,   //  1 x i16
-      v2i16          =  31,   //  2 x i16
-      v4i16          =  32,   //  4 x i16
-      v8i16          =  33,   //  8 x i16
-      v16i16         =  34,   // 16 x i16
-      v32i16         =  35,   // 32 x i16
-      v64i16         =  36,   // 64 x i16
-      v128i16        =  37,   //128 x i16
+      v1i16          =  31,   //  1 x i16
+      v2i16          =  32,   //  2 x i16
+      v4i16          =  33,   //  4 x i16
+      v8i16          =  34,   //  8 x i16
+      v16i16         =  35,   // 16 x i16
+      v32i16         =  36,   // 32 x i16
+      v64i16         =  37,   // 64 x i16
+      v128i16        =  38,   //128 x i16

-      v1i32          =  38,   //  1 x i32
-      v2i32          =  39,   //  2 x i32
-      v4i32          =  40,   //  4 x i32
-      v8i32          =  41,   //  8 x i32
-      v16i32         =  42,   // 16 x i32
-      v32i32         =  43,   // 32 x i32
-      v64i32         =  44,   // 64 x i32
+      v1i32          =  39,   //  1 x i32
+      v2i32          =  40,   //  2 x i32
+      v4i32          =  41,   //  4 x i32
+      v8i32          =  42,   //  8 x i32
+      v16i32         =  43,   // 16 x i32
+      v32i32         =  44,   // 32 x i32
+      v64i32         =  45,   // 64 x i32

-      v1i64          =  45,   //  1 x i64
-      v2i64          =  46,   //  2 x i64
-      v4i64          =  47,   //  4 x i64
-      v8i64          =  48,   //  8 x i64
-      v16i64         =  49,   // 16 x i64
-      v32i64         =  50,   // 32 x i64
+      v1i64          =  46,   //  1 x i64
+      v2i64          =  47,   //  2 x i64
+      v4i64          =  48,   //  4 x i64
+      v8i64          =  49,   //  8 x i64
+      v16i64         =  50,   // 16 x i64
+      v32i64         =  51,   // 32 x i64

-      v1i128         =  51,   //  1 x i128
+      v1i128         =  52,   //  1 x i128
+
+      // Scalable integer types
+      nxv2i1         =  53,   // n x  2 x i1
+      nxv4i1         =  54,   // n x  4 x i1
+      nxv8i1         =  55,   // n x  8 x i1
+      nxv16i1        =  56,   // n x 16 x i1
+      nxv32i1        =  57,   // n x 32 x i1
+
+      nxv1i8         =  58,   // n x  1 x i8
+      nxv2i8         =  59,   // n x  2 x i8
+      nxv4i8         =  60,   // n x  4 x i8
+      nxv8i8         =  61,   // n x  8 x i8
+      nxv16i8        =  62,   // n x 16 x i8
+      nxv32i8        =  63,   // n x 32 x i8
+
+      nxv1i16        =  64,   // n x  1 x i16
+      nxv2i16        =  65,   // n x  2 x i16
+      nxv4i16        =  66,   // n x  4 x i16
+      nxv8i16        =  67,   // n x  8 x i16
+      nxv16i16       =  68,   // n x 16 x i16
+      nxv32i16       =  69,   // n x 32 x i16
+
+      nxv1i32        =  70,   // n x  1 x i32
+      nxv2i32        =  71,   // n x  2 x i32
+      nxv4i32        =  72,   // n x  4 x i32
+      nxv8i32        =  73,   // n x  8 x i32
+      nxv16i32       =  74,   // n x 16 x i32
+      nxv32i32       =  75,   // n x 32 x i32
+
+      nxv1i64        =  76,   // n x  1 x i64
+      nxv2i64        =  77,   // n x  2 x i64
+      nxv4i64        =  78,   // n x  4 x i64
+      nxv8i64        =  79,   // n x  8 x i64
+      nxv16i64       =  80,   // n x 16 x i64
+      nxv32i64       =  81,   // n x 32 x i64

      FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
-      LAST_INTEGER_VECTOR_VALUETYPE = v1i128,
+      LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,

-      v2f16          =  52,   //  2 x f16
-      v4f16          =  53,   //  4 x f16
-      v8f16          =  54,   //  8 x f16
-      v1f32          =  55,   //  1 x f32
-      v2f32          =  56,   //  2 x f32
-      v4f32          =  57,   //  4 x f32
-      v8f32          =  58,   //  8 x f32
-      v16f32         =  59,   // 16 x f32
-      v1f64          =  60,   //  1 x f64
-      v2f64          =  61,   //  2 x f64
-      v4f64          =  62,   //  4 x f64
-      v8f64          =  63,   //  8 x f64
+      FIRST_INTEGER_SCALABLE_VALUETYPE = nxv2i1,
+      LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
+
+      v2f16          =  82,   //  2 x f16
+      v4f16          =  83,   //  4 x f16
+      v8f16          =  84,   //  8 x f16
+      v1f32          =  85,   //  1 x f32
+      v2f32          =  86,   //  2 x f32
+      v4f32          =  87,   //  4 x f32
+      v8f32          =  88,   //  8 x f32
+      v16f32         =  89,   // 16 x f32
+      v1f64          =  90,   //  1 x f64
+      v2f64          =  91,   //  2 x f64
+      v4f64          =  92,   //  4 x f64
+      v8f64          =  93,   //  8 x f64
+
+      nxv2f16        =  94,   // n x  2 x f16
+      nxv4f16        =  95,   // n x  4 x f16
+      nxv8f16        =  96,   // n x  8 x f16
+      nxv1f32        =  97,   // n x  1 x f32
+      nxv2f32        =  98,   // n x  2 x f32
+      nxv4f32        =  99,   // n x  4 x f32
+      nxv8f32        = 100,   // n x  8 x f32
+      nxv16f32       = 101,   // n x 16 x f32
+      nxv1f64        = 102,   // n x  1 x f64
+      nxv2f64        = 103,   // n x  2 x f64
+      nxv4f64        = 104,   // n x  4 x f64
+      nxv8f64        = 105,   // n x  8 x f64

      FIRST_FP_VECTOR_VALUETYPE = v2f16,
-      LAST_FP_VECTOR_VALUETYPE = v8f64,
+      LAST_FP_VECTOR_VALUETYPE = nxv8f64,
+
+      FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
+      LAST_FP_SCALABLE_VALUETYPE = nxv8f64,

      FIRST_VECTOR_VALUETYPE = v2i1,
-      LAST_VECTOR_VALUETYPE  = v8f64,
+      LAST_VECTOR_VALUETYPE  = nxv8f64,

-      x86mmx         =  64,   // This is an X86 MMX value
+      x86mmx         =  106,   // This is an X86 MMX value

-      Glue           =  65,   // This glues nodes together during pre-RA sched
+      Glue           =  107,   // This glues nodes together during pre-RA sched

-      isVoid         =  66,   // This has no value
+      isVoid         =  108,   // This has no value

-      Untyped        =  67,   // This value takes a register, but has
-                              // unspecified type.  The register class
-                              // will be determined by the opcode.
+      Untyped        =  109,   // This value takes a register, but has
+                               // unspecified type.  The register class
+                               // will be determined by the opcode.

-      FIRST_VALUETYPE = 0,    // This is always the beginning of the list.
-      LAST_VALUETYPE =  68,   // This always remains at the end of the list.
+      FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
+      LAST_VALUETYPE =  110,   // This always remains at the end of the list.

      // This is the current maximum for LAST_VALUETYPE.
      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
      // This value must be a multiple of 32.
-      MAX_ALLOWED_VALUETYPE = 96,
+      MAX_ALLOWED_VALUETYPE = 128,

      // A value of type llvm::TokenTy
-      token          = 120,
+      token          = 248,

      // This is MDNode or MDString.
-      Metadata       = 121,
+      Metadata       = 249,

      // An int value the size of the pointer of the current
      // target to any address space. This must only be used internal to
      // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
-      iPTRAny        = 122,
+      iPTRAny        = 250,

      // A vector with any length and element size. This is used
      // for intrinsics that have overloadings based on vector types.
      // This is only for tblgen's consumption!
-      vAny           = 123,
+      vAny           = 251,

      // Any floating-point or vector floating-point value. This is used
      // for intrinsics that have overloadings based on floating-point types.
      // This is only for tblgen's consumption!
-      fAny           = 124,
+      fAny           = 252,

      // An integer or vector integer value of any bit width. This is
      // used for intrinsics that have overloadings based on integer bit widths.
      // This is only for tblgen's consumption!
-      iAny           = 125,
+      iAny           = 253,

      // An int value the size of the pointer of the current
      // target.  This should only be used internal to tblgen!
-      iPTR           = 126,
+      iPTR           = 254,

      // Any type. This is used for intrinsics that have overloadings.
      // This is only for tblgen's consumption!
-      Any            = 127
+      Any            = 255
    };

    SimpleValueType SimpleTy;

+
+    // A class to represent the number of elements in a vector
+    //
+    // For fixed-length vectors, the total number of elements is equal to 'Min'
+    // For scalable vectors, the total number of elements is a multiple of 'Min'
+    class ElementCount {
+    public:
+      unsigned Min;
+      bool Scalable;
+
+      ElementCount(unsigned Min, bool Scalable)
+      : Min(Min), Scalable(Scalable) {}
+
+      ElementCount operator*(unsigned RHS) {
+        return { Min * RHS, Scalable };
+      }
+
+      ElementCount& operator*=(unsigned RHS) {
+        Min *= RHS;
+        return *this;
+      }
+
+      ElementCount operator/(unsigned RHS) {
+        return { Min / RHS, Scalable };
+      }
+
+      ElementCount& operator/=(unsigned RHS) {
+        Min /= RHS;
+        return *this;
+      }
+
+      bool operator==(const ElementCount& RHS) {
+        return Min == RHS.Min && Scalable == RHS.Scalable;
+      }
+    };
+
    constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {}
    constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}

@ -221,6 +312,15 @@ class MVT {
              SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
    }

+    /// Return true if this is a vector value type where the
+    /// runtime length is machine dependent
+    bool isScalableVector() const {
+      return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
+               SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) ||
+              (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
+               SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
+    }
+
    /// Return true if this is a 16-bit vector type.
    bool is16BitVector() const {
      return (SimpleTy == MVT::v2i8  || SimpleTy == MVT::v1i16 ||
@ -318,7 +418,12 @@ class MVT {
      case v32i1:
      case v64i1:
      case v512i1:
-      case v1024i1: return i1;
+      case v1024i1:
+      case nxv2i1:
+      case nxv4i1:
+      case nxv8i1:
+      case nxv16i1:
+      case nxv32i1: return i1;
      case v1i8:
      case v2i8:
      case v4i8:
@ -327,7 +432,13 @@ class MVT {
      case v32i8:
      case v64i8:
      case v128i8:
-      case v256i8: return i8;
+      case v256i8:
+      case nxv1i8:
+      case nxv2i8:
+      case nxv4i8:
+      case nxv8i8:
+      case nxv16i8:
+      case nxv32i8: return i8;
      case v1i16:
      case v2i16:
      case v4i16:
@ -335,33 +446,63 @@ class MVT {
      case v16i16:
      case v32i16:
      case v64i16:
-      case v128i16: return i16;
+      case v128i16:
+      case nxv1i16:
+      case nxv2i16:
+      case nxv4i16:
+      case nxv8i16:
+      case nxv16i16:
+      case nxv32i16: return i16;
      case v1i32:
      case v2i32:
      case v4i32:
      case v8i32:
      case v16i32:
      case v32i32:
-      case v64i32: return i32;
+      case v64i32:
+      case nxv1i32:
+      case nxv2i32:
+      case nxv4i32:
+      case nxv8i32:
+      case nxv16i32:
+      case nxv32i32: return i32;
      case v1i64:
      case v2i64:
      case v4i64:
      case v8i64:
      case v16i64:
-      case v32i64: return i64;
+      case v32i64:
+      case nxv1i64:
+      case nxv2i64:
+      case nxv4i64:
+      case nxv8i64:
+      case nxv16i64:
+      case nxv32i64: return i64;
      case v1i128: return i128;
      case v2f16:
      case v4f16:
-      case v8f16: return f16;
+      case v8f16:
+      case nxv2f16:
+      case nxv4f16:
+      case nxv8f16: return f16;
      case v1f32:
      case v2f32:
      case v4f32:
      case v8f32:
-      case v16f32: return f32;
+      case v16f32:
+      case nxv1f32:
+      case nxv2f32:
+      case nxv4f32:
+      case nxv8f32:
+      case nxv16f32: return f32;
      case v1f64:
      case v2f64:
      case v4f64:
-      case v8f64: return f64;
+      case v8f64:
+      case nxv1f64:
+      case nxv2f64:
+      case nxv4f64:
+      case nxv8f64: return f64;
      }
    }

@ -382,13 +523,24 @@ class MVT {
      case v32i8:
      case v32i16:
      case v32i32:
-      case v32i64: return 32;
+      case v32i64:
+      case nxv32i1:
+      case nxv32i8:
+      case nxv32i16:
+      case nxv32i32:
+      case nxv32i64: return 32;
      case v16i1:
      case v16i8:
      case v16i16:
      case v16i32:
      case v16i64:
-      case v16f32: return 16;
+      case v16f32:
+      case nxv16i1:
+      case nxv16i8:
+      case nxv16i16:
+      case nxv16i32:
+      case nxv16i64:
+      case nxv16f32: return 16;
      case v8i1:
      case v8i8:
      case v8i16:
@ -396,7 +548,15 @@ class MVT {
      case v8i64:
      case v8f16:
      case v8f32:
-      case v8f64: return 8;
+      case v8f64:
+      case nxv8i1:
+      case nxv8i8:
+      case nxv8i16:
+      case nxv8i32:
+      case nxv8i64:
+      case nxv8f16:
+      case nxv8f32:
+      case nxv8f64: return 8;
      case v4i1:
      case v4i8:
      case v4i16:
@ -404,7 +564,15 @@ class MVT {
      case v4i64:
      case v4f16:
      case v4f32:
-      case v4f64: return 4;
+      case v4f64:
+      case nxv4i1:
+      case nxv4i8:
+      case nxv4i16:
+      case nxv4i32:
+      case nxv4i64:
+      case nxv4f16:
+      case nxv4f32:
+      case nxv4f64: return 4;
      case v2i1:
      case v2i8:
      case v2i16:
@ -412,17 +580,35 @@ class MVT {
      case v2i64:
      case v2f16:
      case v2f32:
-      case v2f64: return 2;
+      case v2f64:
+      case nxv2i1:
+      case nxv2i8:
+      case nxv2i16:
+      case nxv2i32:
+      case nxv2i64:
+      case nxv2f16:
+      case nxv2f32:
+      case nxv2f64: return 2;
      case v1i8:
      case v1i16:
      case v1i32:
      case v1i64:
      case v1i128:
      case v1f32:
-      case v1f64: return 1;
+      case v1f64:
+      case nxv1i8:
+      case nxv1i16:
+      case nxv1i32:
+      case nxv1i64:
+      case nxv1f32:
+      case nxv1f64: return 1;
      }
    }

+    MVT::ElementCount getVectorElementCount() const {
+      return { getVectorNumElements(), isScalableVector() };
+    }
+
    unsigned getSizeInBits() const {
      switch (SimpleTy) {
      default:
@ -443,16 +629,23 @@ class MVT {
      case Metadata:
        llvm_unreachable("Value type is metadata.");
      case i1  :  return 1;
-      case v2i1:  return 2;
-      case v4i1:  return 4;
+      case v2i1:
+      case nxv2i1: return 2;
+      case v4i1:
+      case nxv4i1: return 4;
      case i8  :
      case v1i8:
-      case v8i1: return 8;
+      case v8i1:
+      case nxv1i8:
+      case nxv8i1: return 8;
      case i16 :
      case f16:
      case v16i1:
      case v2i8:
-      case v1i16: return 16;
+      case v1i16:
+      case nxv16i1:
+      case nxv2i8:
+      case nxv1i16: return 16;
      case f32 :
      case i32 :
      case v32i1:
@ -460,7 +653,13 @@ class MVT {
      case v2i16:
      case v2f16:
      case v1f32:
-      case v1i32: return 32;
+      case v1i32:
+      case nxv32i1:
+      case nxv4i8:
+      case nxv2i16:
+      case nxv1i32:
+      case nxv2f16:
+      case nxv1f32: return 32;
      case x86mmx:
      case f64 :
      case i64 :
@ -471,7 +670,14 @@ class MVT {
      case v1i64:
      case v4f16:
      case v2f32:
-      case v1f64: return 64;
+      case v1f64:
+      case nxv8i8:
+      case nxv4i16:
+      case nxv2i32:
+      case nxv1i64:
+      case nxv4f16:
+      case nxv2f32:
+      case nxv1f64: return 64;
      case f80 :  return 80;
      case f128:
      case ppcf128:
@ -483,29 +689,50 @@ class MVT {
      case v1i128:
      case v8f16:
      case v4f32:
-      case v2f64: return 128;
+      case v2f64:
+      case nxv16i8:
+      case nxv8i16:
+      case nxv4i32:
+      case nxv2i64:
+      case nxv8f16:
+      case nxv4f32:
+      case nxv2f64: return 128;
      case v32i8:
      case v16i16:
      case v8i32:
      case v4i64:
      case v8f32:
-      case v4f64: return 256;
+      case v4f64:
+      case nxv32i8:
+      case nxv16i16:
+      case nxv8i32:
+      case nxv4i64:
+      case nxv8f32:
+      case nxv4f64: return 256;
      case v512i1:
      case v64i8:
      case v32i16:
      case v16i32:
      case v8i64:
      case v16f32:
-      case v8f64: return 512;
+      case v8f64:
+      case nxv32i16:
+      case nxv16i32:
+      case nxv8i64:
+      case nxv16f32:
+      case nxv8f64: return 512;
      case v1024i1:
      case v128i8:
      case v64i16:
      case v32i32:
-      case v16i64: return 1024;
+      case v16i64:
+      case nxv32i32:
+      case nxv16i64: return 1024;
      case v256i8:
      case v128i16:
      case v64i32:
-      case v32i64: return 2048;
+      case v32i64:
+      case nxv32i64: return 2048;
      }
    }

@ -659,6 +886,83 @@ class MVT {
      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
    }

+    static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
+      switch(VT.SimpleTy) {
+        default:
+          break;
+        case MVT::i1:
+          if (NumElements == 2)  return MVT::nxv2i1;
+          if (NumElements == 4)  return MVT::nxv4i1;
+          if (NumElements == 8)  return MVT::nxv8i1;
+          if (NumElements == 16) return MVT::nxv16i1;
+          if (NumElements == 32) return MVT::nxv32i1;
+          break;
+        case MVT::i8:
+          if (NumElements == 1)  return MVT::nxv1i8;
+          if (NumElements == 2)  return MVT::nxv2i8;
+          if (NumElements == 4)  return MVT::nxv4i8;
+          if (NumElements == 8)  return MVT::nxv8i8;
+          if (NumElements == 16) return MVT::nxv16i8;
+          if (NumElements == 32) return MVT::nxv32i8;
+          break;
+        case MVT::i16:
+          if (NumElements == 1)  return MVT::nxv1i16;
+          if (NumElements == 2)  return MVT::nxv2i16;
+          if (NumElements == 4)  return MVT::nxv4i16;
+          if (NumElements == 8)  return MVT::nxv8i16;
+          if (NumElements == 16) return MVT::nxv16i16;
+          if (NumElements == 32) return MVT::nxv32i16;
+          break;
+        case MVT::i32:
+          if (NumElements == 1)  return MVT::nxv1i32;
+          if (NumElements == 2)  return MVT::nxv2i32;
+          if (NumElements == 4)  return MVT::nxv4i32;
+          if (NumElements == 8)  return MVT::nxv8i32;
+          if (NumElements == 16) return MVT::nxv16i32;
+          if (NumElements == 32) return MVT::nxv32i32;
+          break;
+        case MVT::i64:
+          if (NumElements == 1)  return MVT::nxv1i64;
+          if (NumElements == 2)  return MVT::nxv2i64;
+          if (NumElements == 4)  return MVT::nxv4i64;
+          if (NumElements == 8)  return MVT::nxv8i64;
+          if (NumElements == 16) return MVT::nxv16i64;
+          if (NumElements == 32) return MVT::nxv32i64;
+          break;
+        case MVT::f16:
+          if (NumElements == 2)  return MVT::nxv2f16;
+          if (NumElements == 4)  return MVT::nxv4f16;
+          if (NumElements == 8)  return MVT::nxv8f16;
+          break;
+        case MVT::f32:
+          if (NumElements == 1)  return MVT::nxv1f32;
+          if (NumElements == 2)  return MVT::nxv2f32;
+          if (NumElements == 4)  return MVT::nxv4f32;
+          if (NumElements == 8)  return MVT::nxv8f32;
+          if (NumElements == 16) return MVT::nxv16f32;
+          break;
+        case MVT::f64:
+          if (NumElements == 1)  return MVT::nxv1f64;
+          if (NumElements == 2)  return MVT::nxv2f64;
+          if (NumElements == 4)  return MVT::nxv4f64;
+          if (NumElements == 8)  return MVT::nxv8f64;
+          break;
+      }
+      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+    }
+
+    static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
+      if (IsScalable)
+        return getScalableVectorVT(VT, NumElements);
+      return getVectorVT(VT, NumElements);
+    }
+
+    static MVT getVectorVT(MVT VT, MVT::ElementCount EC) {
+      if (EC.Scalable)
+        return getScalableVectorVT(VT, EC.Min);
+      return getVectorVT(VT, EC.Min);
+    }
+
    /// Return the value type corresponding to the specified type.  This returns
    /// all pointers as iPTR.  If HandleUnknown is true, unknown types are
    /// returned as Other, otherwise they are invalid.
@ -709,6 +1013,14 @@ class MVT {
          MVT::FIRST_FP_VECTOR_VALUETYPE,
          (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
    }
+    static mvt_range integer_scalable_vector_valuetypes() {
+      return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
+              (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
+    }
+    static mvt_range fp_scalable_vector_valuetypes() {
+      return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
+                   (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
+    }
    /// @}
  };

--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@ -44,7 +44,7 @@ namespace llvm {
    bool operator!=(EVT VT) const {
      if (V.SimpleTy != VT.V.SimpleTy)
        return true;
-      if (V.SimpleTy < 0)
+      if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
        return LLVMTy != VT.LLVMTy;
      return false;
    }
@ -60,31 +60,48 @@ namespace llvm {
    /// bits.
    static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
      MVT M = MVT::getIntegerVT(BitWidth);
-      if (M.SimpleTy >= 0)
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
        return M;
      return getExtendedIntegerVT(Context, BitWidth);
    }

    /// Returns the EVT that represents a vector NumElements in length, where
    /// each element is of type VT.
-    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
-      MVT M = MVT::getVectorVT(VT.V, NumElements);
-      if (M.SimpleTy >= 0)
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
+                           bool IsScalable = false) {
+      MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
        return M;
+
+      assert(!IsScalable && "We don't support extended scalable types yet");
      return getExtendedVectorVT(Context, VT, NumElements);
    }

+    /// Returns the EVT that represents a vector EC.Min elements in length,
+    /// where each element is of type VT.
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) {
+      MVT M = MVT::getVectorVT(VT.V, EC);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return M;
+      assert (!EC.Scalable && "We don't support extended scalable types yet");
+      return getExtendedVectorVT(Context, VT, EC.Min);
+    }
+
    /// Return a vector with the same number of elements as this vector, but
    /// with the element type converted to an integer type with the same
    /// bitwidth.
    EVT changeVectorElementTypeToInteger() const {
-      if (!isSimple())
+      if (!isSimple()) {
+        assert (!isScalableVector() &&
+                "We don't support extended scalable types yet");
        return changeExtendedVectorElementTypeToInteger();
+      }
      MVT EltTy = getSimpleVT().getVectorElementType();
      unsigned BitWidth = EltTy.getSizeInBits();
      MVT IntTy = MVT::getIntegerVT(BitWidth);
-      MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements());
-      assert(VecTy.SimpleTy >= 0 &&
+      MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(),
+                                   isScalableVector());
+      assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
             "Simple vector VT not representable by simple integer vector VT!");
      return VecTy;
    }
@ -104,7 +121,7 @@ namespace llvm {

    /// Test if the given EVT is simple (as opposed to being extended).
    bool isSimple() const {
-      return V.SimpleTy >= 0;
+      return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE;
    }

    /// Test if the given EVT is extended (as opposed to being simple).
@ -132,6 +149,17 @@ namespace llvm {
      return isSimple() ? V.isVector() : isExtendedVector();
    }

+    /// Return true if this is a vector type where the runtime
+    /// length is machine dependent
+    bool isScalableVector() const {
+      // FIXME: We don't support extended scalable types yet, because the
+      // matching IR type doesn't exist. Once it has been added, this can
+      // be changed to call isExtendedScalableVector.
+      if (!isSimple())
+        return false;
+      return V.isScalableVector();
+    }
+
    /// Return true if this is a 16-bit vector type.
    bool is16BitVector() const {
      return isSimple() ? V.is16BitVector() : isExtended16BitVector();
@ -247,6 +275,17 @@ namespace llvm {
      return getExtendedVectorNumElements();
    }

+    // Given a (possibly scalable) vector type, return the ElementCount
+    MVT::ElementCount getVectorElementCount() const {
+      assert((isVector()) && "Invalid vector type!");
+      if (isSimple())
+        return V.getVectorElementCount();
+
+      assert(!isScalableVector() &&
+             "We don't support extended scalable types yet");
+      return {getExtendedVectorNumElements(), false};
+    }
+
    /// Return the size of the specified value type in bits.
    unsigned getSizeInBits() const {
      if (isSimple())
@ -301,7 +340,17 @@ namespace llvm {
    EVT widenIntegerVectorElementType(LLVMContext &Context) const {
      EVT EltVT = getVectorElementType();
      EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits());
-      return EVT::getVectorVT(Context, EltVT, getVectorNumElements());
+      return EVT::getVectorVT(Context, EltVT, getVectorElementCount());
+    }
+
+    // Return a VT for a vector type with the same element type but
+    // half the number of elements. The type returned may be an
+    // extended type.
+    EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
+      EVT EltVT = getVectorElementType();
+      auto EltCnt = getVectorElementCount();
+      assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
+      return EVT::getVectorVT(Context, EltVT, EltCnt / 2);
    }

    /// Returns true if the given vector is a power of 2.
@ -316,7 +365,8 @@ namespace llvm {
      if (!isPow2VectorType()) {
        unsigned NElts = getVectorNumElements();
        unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
-        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts);
+        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts,
+                                isScalableVector());
      }
      else {
        return *this;
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@ -19,101 +19,147 @@ class ValueType<int size, int value> {
  int Value = value;
 }

-def OtherVT: ValueType<0  ,  0>;   // "Other" value
-def i1     : ValueType<1  ,  1>;   // One bit boolean value
-def i8     : ValueType<8  ,  2>;   // 8-bit integer value
-def i16    : ValueType<16 ,  3>;   // 16-bit integer value
-def i32    : ValueType<32 ,  4>;   // 32-bit integer value
-def i64    : ValueType<64 ,  5>;   // 64-bit integer value
-def i128   : ValueType<128,  6>;   // 128-bit integer value
-def f16    : ValueType<16 ,  7>;   // 16-bit floating point value
-def f32    : ValueType<32 ,  8>;   // 32-bit floating point value
-def f64    : ValueType<64 ,  9>;   // 64-bit floating point value
-def f80    : ValueType<80 , 10>;   // 80-bit floating point value
-def f128   : ValueType<128, 11>;   // 128-bit floating point value
-def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
+def OtherVT: ValueType<0  ,  1>;   // "Other" value
+def i1     : ValueType<1  ,  2>;   // One bit boolean value
+def i8     : ValueType<8  ,  3>;   // 8-bit integer value
+def i16    : ValueType<16 ,  4>;   // 16-bit integer value
+def i32    : ValueType<32 ,  5>;   // 32-bit integer value
+def i64    : ValueType<64 ,  6>;   // 64-bit integer value
+def i128   : ValueType<128,  7>;   // 128-bit integer value
+def f16    : ValueType<16 ,  8>;   // 16-bit floating point value
+def f32    : ValueType<32 ,  9>;   // 32-bit floating point value
+def f64    : ValueType<64 , 10>;   // 64-bit floating point value
+def f80    : ValueType<80 , 11>;   // 80-bit floating point value
+def f128   : ValueType<128, 12>;   // 128-bit floating point value
+def ppcf128: ValueType<128, 13>;   // PPC 128-bit floating point value

-def v2i1   : ValueType<2 ,  13>;   //   2 x i1 vector value
-def v4i1   : ValueType<4 ,  14>;   //   4 x i1 vector value
-def v8i1   : ValueType<8 ,  15>;   //   8 x i1 vector value
-def v16i1  : ValueType<16,  16>;   //  16 x i1 vector value
-def v32i1  : ValueType<32 , 17>;   //  32 x i1 vector value
-def v64i1  : ValueType<64 , 18>;   //  64 x i1 vector value
-def v512i1 : ValueType<512, 19>;   // 512 x i1 vector value
-def v1024i1: ValueType<1024,20>;   //1024 x i1 vector value
+def v2i1   : ValueType<2 ,  14>;   //   2 x i1 vector value
+def v4i1   : ValueType<4 ,  15>;   //   4 x i1 vector value
+def v8i1   : ValueType<8 ,  16>;   //   8 x i1 vector value
+def v16i1  : ValueType<16,  17>;   //  16 x i1 vector value
+def v32i1  : ValueType<32 , 18>;   //  32 x i1 vector value
+def v64i1  : ValueType<64 , 19>;   //  64 x i1 vector value
+def v512i1 : ValueType<512, 20>;   // 512 x i1 vector value
+def v1024i1: ValueType<1024,21>;   //1024 x i1 vector value

-def v1i8   : ValueType<16,  21>;   //  1 x i8  vector value
-def v2i8   : ValueType<16 , 22>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 23>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 24>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 25>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 26>;   // 32 x i8  vector value
-def v64i8  : ValueType<512, 27>;   // 64 x i8  vector value
-def v128i8 : ValueType<1024,28>;   //128 x i8  vector value
-def v256i8 : ValueType<2048,29>;   //256 x i8  vector value
+def v1i8   : ValueType<16,  22>;   //  1 x i8  vector value
+def v2i8   : ValueType<16 , 23>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 24>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 25>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 26>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 27>;   // 32 x i8  vector value
+def v64i8  : ValueType<512, 28>;   // 64 x i8  vector value
+def v128i8 : ValueType<1024,29>;   //128 x i8  vector value
+def v256i8 : ValueType<2048,30>;   //256 x i8  vector value

-def v1i16  : ValueType<16 , 30>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 31>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 32>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 33>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 34>;   // 16 x i16 vector value
-def v32i16 : ValueType<512, 35>;   // 32 x i16 vector value
-def v64i16 : ValueType<1024,36>;   // 64 x i16 vector value
-def v128i16: ValueType<2048,37>;   //128 x i16 vector value
+def v1i16  : ValueType<16 , 31>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 32>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 33>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 34>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 35>;   // 16 x i16 vector value
+def v32i16 : ValueType<512, 36>;   // 32 x i16 vector value
+def v64i16 : ValueType<1024,37>;   // 64 x i16 vector value
+def v128i16: ValueType<2048,38>;   //128 x i16 vector value

-def v1i32  : ValueType<32 , 38>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 39>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 40>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 41>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 42>;   // 16 x i32 vector value
-def v32i32 : ValueType<1024,43>;   // 32 x i32 vector value
-def v64i32 : ValueType<2048,44>;   // 32 x i32 vector value
+def v1i32  : ValueType<32 , 39>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 40>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 41>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 42>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 43>;   // 16 x i32 vector value
+def v32i32 : ValueType<1024,44>;   // 32 x i32 vector value
+def v64i32 : ValueType<2048,45>;   // 32 x i32 vector value

-def v1i64  : ValueType<64 , 45>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 46>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 47>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 48>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,49>;   // 16 x i64 vector value
-def v32i64 : ValueType<2048,50>;   // 32 x i64 vector value
+def v1i64  : ValueType<64 , 46>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 47>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 48>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 49>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,50>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,51>;   // 32 x i64 vector value

-def v1i128 : ValueType<128, 51>;   //  1 x i128 vector value
+def v1i128 : ValueType<128, 52>;   //  1 x i128 vector value

-def v2f16  : ValueType<32 , 52>;   //  2 x f16 vector value
-def v4f16  : ValueType<64 , 53>;   //  4 x f16 vector value
-def v8f16  : ValueType<128, 54>;   //  8 x f16 vector value
-def v1f32  : ValueType<32 , 55>;   //  1 x f32 vector value
-def v2f32  : ValueType<64 , 56>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 57>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 58>;   //  8 x f32 vector value
-def v16f32 : ValueType<512, 59>;   // 16 x f32 vector value
-def v1f64  : ValueType<64,  60>;   //  1 x f64 vector value
-def v2f64  : ValueType<128, 61>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 62>;   //  4 x f64 vector value
-def v8f64  : ValueType<512, 63>;   //  8 x f64 vector value
+def nxv2i1  : ValueType<2,   53>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,   54>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,   55>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16,  56>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32,  57>;  // n x 32 x i1  vector value

+def nxv1i8  : ValueType<8,   58>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  59>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  60>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  61>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 62>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 63>;  // n x 32 x i8  vector value

-def x86mmx : ValueType<64 , 64>;   // X86 MMX value
-def FlagVT : ValueType<0  , 65>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 66>;   // Produces no value
-def untyped: ValueType<8  , 67>;   // Produces an untyped value
-def token  : ValueType<0  , 120>;  // TokenTy
-def MetadataVT: ValueType<0, 121>; // Metadata
+def nxv1i16 : ValueType<16,  64>;  // n x  1 x i16 vector value
+def nxv2i16 : ValueType<32,  65>;  // n x  2 x i16 vector value
+def nxv4i16 : ValueType<64,  66>;  // n x  4 x i16 vector value
+def nxv8i16 : ValueType<128, 67>;  // n x  8 x i16 vector value
+def nxv16i16: ValueType<256, 68>;  // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 69>;  // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32,  70>;  // n x  1 x i32 vector value
+def nxv2i32 : ValueType<64,  71>;  // n x  2 x i32 vector value
+def nxv4i32 : ValueType<128, 72>;  // n x  4 x i32 vector value
+def nxv8i32 : ValueType<256, 73>;  // n x  8 x i32 vector value
+def nxv16i32: ValueType<512, 74>;  // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,75>;  // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64,  76>;  // n x  1 x i64 vector value
+def nxv2i64 : ValueType<128, 77>;  // n x  2 x i64 vector value
+def nxv4i64 : ValueType<256, 78>;  // n x  4 x i64 vector value
+def nxv8i64 : ValueType<512, 79>;  // n x  8 x i64 vector value
+def nxv16i64: ValueType<1024,80>;  // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,81>;  // n x 32 x i64 vector value
+
+def v2f16  : ValueType<32 , 82>;   //  2 x f16 vector value
+def v4f16  : ValueType<64 , 83>;   //  4 x f16 vector value
+def v8f16  : ValueType<128, 84>;   //  8 x f16 vector value
+def v1f32  : ValueType<32 , 85>;   //  1 x f32 vector value
+def v2f32  : ValueType<64 , 86>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 87>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 88>;   //  8 x f32 vector value
+def v16f32 : ValueType<512, 89>;   // 16 x f32 vector value
+def v1f64  : ValueType<64,  90>;   //  1 x f64 vector value
+def v2f64  : ValueType<128, 91>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 92>;   //  4 x f64 vector value
+def v8f64  : ValueType<512, 93>;   //  8 x f64 vector value
+
+def nxv2f16  : ValueType<32 ,  94>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 ,  95>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128,  96>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 ,  97>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 ,  98>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128,  99>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 100>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 101>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  102>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 103>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 104>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 105>; // n x  8 x f64 vector value
+
+def x86mmx : ValueType<64 , 106>;   // X86 MMX value
+def FlagVT : ValueType<0  , 107>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 108>;   // Produces no value
+def untyped: ValueType<8  , 109>;   // Produces an untyped value
+def token  : ValueType<0  , 248>;   // TokenTy
+def MetadataVT: ValueType<0, 249>;  // Metadata

 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
-def iPTRAny   : ValueType<0, 122>;
+def iPTRAny   : ValueType<0, 250>;

 // Pseudo valuetype to represent "vector of any size"
-def vAny   : ValueType<0  , 123>;
+def vAny   : ValueType<0  , 251>;

 // Pseudo valuetype to represent "float of any format"
-def fAny   : ValueType<0  , 124>;
+def fAny   : ValueType<0  , 252>;

 // Pseudo valuetype to represent "integer of any bit width"
-def iAny   : ValueType<0  , 125>;
+def iAny   : ValueType<0  , 253>;

 // Pseudo valuetype mapped to the current pointer size.
-def iPTR   : ValueType<0  , 126>;
+def iPTR   : ValueType<0  , 254>;

 // Pseudo valuetype to represent "any type of any size".
-def Any    : ValueType<0  , 127>;
+def Any    : ValueType<0  , 255>;
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@ -59,9 +59,6 @@
 /* Define to 1 if you have the <errno.h> header file. */
 #cmakedefine HAVE_ERRNO_H ${HAVE_ERRNO_H}

-/* Define to 1 if you have the <execinfo.h> header file. */
-#cmakedefine HAVE_EXECINFO_H ${HAVE_EXECINFO_H}
-
 /* Define to 1 if you have the <fcntl.h> header file. */
 #cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H}

@ -389,6 +386,9 @@
 /* LLVM version information */
 #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"

+/* Whether tools show host and target info when invoked with --version */
+#cmakedefine01 LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
+
 /* Major version of the LLVM API */
 #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}

--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@ -247,16 +247,11 @@ public:
  /// DW_AT_call_line attribute in this DIE.
  /// \param CallColumn filled in with non-zero if successful, zero if there is
  /// no DW_AT_call_column attribute in this DIE.
+  /// \param CallDiscriminator filled in with non-zero if successful, zero if
+  /// there is no DW_AT_GNU_discriminator attribute in this DIE.
  void getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
-                      uint32_t &CallColumn) const;
+                      uint32_t &CallColumn, uint32_t &CallDiscriminator) const;
  
-  /// Get inlined chain for a given address, rooted at the current DIE.
-  /// Returns empty chain if address is not contained in address range
-  /// of current DIE.
-  void
-  getInlinedChainForAddress(const uint64_t Address,
-                            SmallVectorImpl<DWARFDie> &InlinedChain) const;
-
  class attribute_iterator;

  /// Get an iterator range to all attributes in the current DIE only.
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@ -31,6 +31,7 @@
 #include <cstdint>
 #include <memory>
 #include <vector>
+#include <map>

 namespace llvm {

@ -134,6 +135,11 @@ class DWARFUnit {
  uint64_t BaseAddr;
  // The compile unit debug information entry items.
  std::vector<DWARFDebugInfoEntry> DieArray;
+
+  // Map from range's start address to end address and corresponding DIE.
+  // IntervalMap does not support range removal, as a result, we use the
+  // std::map::upper_bound for address range lookup.
+  std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
  typedef iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>
      die_iterator_range;

@ -183,6 +189,9 @@ public:
    AddrOffsetSectionBase = Base;
  }

+  // Recursively update address to Die map.
+  void updateAddressDieMap(DWARFDie Die);
+
  void setRangesSection(StringRef RS, uint32_t Base) {
    RangeSection = RS;
    RangeSectionBase = Base;
@ -339,10 +348,10 @@ private:
  /// it was actually constructed.
  bool parseDWO();

-  /// getSubprogramForAddress - Returns subprogram DIE with address range
+  /// getSubroutineForAddress - Returns subprogram DIE with address range
  /// encompassing the provided address. The pointer is alive as long as parsed
  /// compile unit DIEs are not cleared.
-  DWARFDie getSubprogramForAddress(uint64_t Address);
+  DWARFDie getSubroutineForAddress(uint64_t Address);
 };

 } // end namespace llvm
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@ -108,18 +108,16 @@ public:
  bool hasSExtAttr() const;

  /// Add attributes to an argument.
-  void addAttr(AttributeList AS);
+  void addAttrs(AttrBuilder &B);

-  void addAttr(Attribute::AttrKind Kind) {
-    addAttr(AttributeList::get(getContext(), getArgNo() + 1, Kind));
-  }
+  void addAttr(Attribute::AttrKind Kind);
+
+  void addAttr(Attribute Attr);

  /// Remove attributes from an argument.
  void removeAttr(AttributeList AS);

-  void removeAttr(Attribute::AttrKind Kind) {
-    removeAttr(AttributeList::get(getContext(), getArgNo() + 1, Kind));
-  }
+  void removeAttr(Attribute::AttrKind Kind);

  /// Check if an argument has a given attribute.
  bool hasAttribute(Attribute::AttrKind Kind) const;
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@ -356,9 +356,6 @@ public:
  AttributeList addAttributes(LLVMContext &C, unsigned Index,
                              AttributeList Attrs) const;

-  AttributeList addAttributes(LLVMContext &C, unsigned Index,
-                              AttributeSet AS) const;
-
  AttributeList addAttributes(LLVMContext &C, unsigned Index,
                              const AttrBuilder &B) const;

--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@ -41,17 +41,14 @@ namespace llvm {
 class MDNode;

 /// This class represents a range of values.
-///
 class ConstantRange {
  APInt Lower, Upper;

 public:
  /// Initialize a full (the default) or empty set for the specified bit width.
-  ///
  explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true);

  /// Initialize a range to hold the single specified value.
-  ///
  ConstantRange(APInt Value);

  /// @brief Initialize a range of values explicitly. This will assert out if
@ -119,46 +116,36 @@ public:
  bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const;

  /// Return the lower value for this range.
-  ///
  const APInt &getLower() const { return Lower; }

  /// Return the upper value for this range.
-  ///
  const APInt &getUpper() const { return Upper; }

  /// Get the bit width of this ConstantRange.
-  ///
  uint32_t getBitWidth() const { return Lower.getBitWidth(); }

  /// Return true if this set contains all of the elements possible
  /// for this data-type.
-  ///
  bool isFullSet() const;

  /// Return true if this set contains no members.
-  ///
  bool isEmptySet() const;

  /// Return true if this set wraps around the top of the range.
  /// For example: [100, 8).
-  ///
  bool isWrappedSet() const;

  /// Return true if this set wraps around the INT_MIN of
  /// its bitwidth. For example: i8 [120, 140).
-  ///
  bool isSignWrappedSet() const;

  /// Return true if the specified value is in the set.
-  ///
  bool contains(const APInt &Val) const;

  /// Return true if the other range is a subset of this one.
-  ///
  bool contains(const ConstantRange &CR) const;

  /// If this set contains a single element, return it, otherwise return null.
-  ///
  const APInt *getSingleElement() const {
    if (Upper == Lower + 1)
      return &Lower;
@ -174,35 +161,27 @@ public:
  }

  /// Return true if this set contains exactly one member.
-  ///
  bool isSingleElement() const { return getSingleElement() != nullptr; }

  /// Return the number of elements in this set.
-  ///
  APInt getSetSize() const;

  /// Compare set size of this range with the range CR.
-  ///
  bool isSizeStrictlySmallerThanOf(const ConstantRange &CR) const;

  /// Return the largest unsigned value contained in the ConstantRange.
-  ///
  APInt getUnsignedMax() const;

  /// Return the smallest unsigned value contained in the ConstantRange.
-  ///
  APInt getUnsignedMin() const;

  /// Return the largest signed value contained in the ConstantRange.
-  ///
  APInt getSignedMax() const;

  /// Return the smallest signed value contained in the ConstantRange.
-  ///
  APInt getSignedMin() const;

  /// Return true if this range is equal to another range.
-  ///
  bool operator==(const ConstantRange &CR) const {
    return Lower == CR.Lower && Upper == CR.Upper;
  }
@ -213,8 +192,8 @@ public:
  /// Subtract the specified constant from the endpoints of this constant range.
  ConstantRange subtract(const APInt &CI) const;

-  /// \brief Subtract the specified range from this range (aka relative
-  /// complement of the sets).
+  /// Subtract the specified range from this range (aka relative complement of
+  /// the sets).
  ConstantRange difference(const ConstantRange &CR) const;

  /// Return the range that results from the intersection of
@ -223,7 +202,6 @@ public:
  /// smallest possible set size that does so.  Because there may be two
  /// intersections with the same set size, A.intersectWith(B) might not
  /// be equal to B.intersectWith(A).
-  ///
  ConstantRange intersectWith(const ConstantRange &CR) const;

  /// Return the range that results from the union of this range
@ -231,7 +209,6 @@ public:
  /// elements of both sets, but may contain more.  For example, [3, 9) union
  /// [12,15) is [3, 15), which includes 9, 10, and 11, which were not included
  /// in either set before.
-  ///
  ConstantRange unionWith(const ConstantRange &CR) const;

  /// Return a new range representing the possible values resulting
@ -331,15 +308,12 @@ public:
  ConstantRange lshr(const ConstantRange &Other) const;

  /// Return a new range that is the logical not of the current set.
-  ///
  ConstantRange inverse() const;

  /// Print out the bounds to a stream.
-  ///
  void print(raw_ostream &OS) const;

  /// Allow printing from a debugger easily.
-  ///
  void dump() const;
 };

--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@ -778,6 +778,9 @@ namespace llvm {
    }
  };

+  // Create wrappers for C Binding types (see CBindingWrapping.h).
+  DEFINE_ISA_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
+
 } // end namespace llvm

 #endif // LLVM_IR_DIBUILDER_H
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@ -2232,6 +2232,9 @@ public:
  expr_op_iterator expr_op_end() const {
    return expr_op_iterator(elements_end());
  }
+  iterator_range<expr_op_iterator> expr_ops() const {
+    return {expr_op_begin(), expr_op_end()};
+  }
  /// @}

  bool isValid() const;
@ -2240,7 +2243,7 @@ public:
    return MD->getMetadataID() == DIExpressionKind;
  }

-  /// Is the first element a DW_OP_deref?.
+  /// Return whether the first element a DW_OP_deref.
  bool startsWithDeref() const {
    return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
  }
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@ -273,10 +273,11 @@ public:
  Value *getPointerOperand() { return getOperand(0); }
  const Value *getPointerOperand() const { return getOperand(0); }
  static unsigned getPointerOperandIndex() { return 0U; }
+  Type *getPointerOperandType() const { return getPointerOperand()->getType(); }

  /// Returns the address space of the pointer operand.
  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
+    return getPointerOperandType()->getPointerAddressSpace();
  }

  // Methods for support type inquiry through isa, cast, and dyn_cast:
@ -397,10 +398,11 @@ public:
  Value *getPointerOperand() { return getOperand(1); }
  const Value *getPointerOperand() const { return getOperand(1); }
  static unsigned getPointerOperandIndex() { return 1U; }
+  Type *getPointerOperandType() const { return getPointerOperand()->getType(); }

  /// Returns the address space of the pointer operand.
  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
+    return getPointerOperandType()->getPointerAddressSpace();
  }

  // Methods for support type inquiry through isa, cast, and dyn_cast:
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@ -30,6 +30,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstddef>
@ -133,6 +134,14 @@ public:
  /// @}
 };

+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(Metadata, LLVMMetadataRef)
+
+// Specialized opaque metadata conversions.
+inline Metadata **unwrap(LLVMMetadataRef *MDs) {
+  return reinterpret_cast<Metadata**>(MDs);
+}
+
 #define HANDLE_METADATA(CLASS) class CLASS;
 #include "llvm/IR/Metadata.def"

--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@ -160,7 +160,6 @@ private:
  std::vector<ValueInfo> RefEdgeList;

 protected:
-  /// GlobalValueSummary constructor.
  GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
      : Kind(K), Flags(Flags), OriginalName(0), RefEdgeList(std::move(Refs)) {}

@ -221,7 +220,6 @@ class AliasSummary : public GlobalValueSummary {
  GlobalValueSummary *AliaseeSummary;

 public:
-  /// Summary constructors.
  AliasSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
      : GlobalValueSummary(AliasKind, Flags, std::move(Refs)) {}

@ -297,7 +295,6 @@ private:
  std::unique_ptr<TypeIdInfo> TIdInfo;

 public:
-  /// Summary constructors.
  FunctionSummary(GVFlags Flags, unsigned NumInsts, std::vector<ValueInfo> Refs,
                  std::vector<EdgeTy> CGEdges,
                  std::vector<GlobalValue::GUID> TypeTests,
@ -418,7 +415,6 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
 class GlobalVarSummary : public GlobalValueSummary {

 public:
-  /// Summary constructors.
  GlobalVarSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
      : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {}

--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@ -267,15 +267,15 @@ inline cst_pred_ty<is_all_ones> m_AllOnes() {
 }
 inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }

-struct is_sign_bit {
-  bool isValue(const APInt &C) { return C.isSignBit(); }
+struct is_sign_mask {
+  bool isValue(const APInt &C) { return C.isSignMask(); }
 };

 /// \brief Match an integer or vector with only the sign bit(s) set.
-inline cst_pred_ty<is_sign_bit> m_SignBit() {
-  return cst_pred_ty<is_sign_bit>();
+inline cst_pred_ty<is_sign_mask> m_SignMask() {
+  return cst_pred_ty<is_sign_mask>();
 }
-inline api_pred_ty<is_sign_bit> m_SignBit(const APInt *&V) { return V; }
+inline api_pred_ty<is_sign_mask> m_SignMask(const APInt *&V) { return V; }

 struct is_power2 {
  bool isValue(const APInt &C) { return C.isPowerOf2(); }
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@ -61,9 +61,29 @@ public:
  /// that also works with less standard-compliant compilers
  void swap(Use &RHS);

+  /// Pointer traits for the UserRef PointerIntPair. This ensures we always
+  /// use the LSB regardless of pointer alignment on different targets.
+  struct UserRefPointerTraits {
+    static inline void *getAsVoidPointer(User *P) { return P; }
+    static inline User *getFromVoidPointer(void *P) {
+      return (User *)P;
+    }
+    enum { NumLowBitsAvailable = 1 };
+  };
+
  // A type for the word following an array of hung-off Uses in memory, which is
  // a pointer back to their User with the bottom bit set.
-  typedef PointerIntPair<User *, 1, unsigned> UserRef;
+  typedef PointerIntPair<User *, 1, unsigned, UserRefPointerTraits> UserRef;
+
+  /// Pointer traits for the Prev PointerIntPair. This ensures we always use
+  /// the two LSBs regardless of pointer alignment on different targets.
+  struct PrevPointerTraits {
+    static inline void *getAsVoidPointer(Use **P) { return P; }
+    static inline Use **getFromVoidPointer(void *P) {
+      return (Use **)P;
+    }
+    enum { NumLowBitsAvailable = 2 };
+  };

 private:
  /// Destructor - Only for zap()
@ -115,7 +135,7 @@ private:

  Value *Val;
  Use *Next;
-  PointerIntPair<Use **, 2, PrevPtrTag> Prev;
+  PointerIntPair<Use **, 2, PrevPtrTag, PrevPointerTraits> Prev;

  void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); }

--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@ -65,8 +65,8 @@ protected:
  // Properties to be set by the target writer, used to configure asm printer.
  //

-  /// Pointer size in bytes.  Default is 4.
-  unsigned PointerSize = 4;
+  /// Code pointer size in bytes.  Default is 4.
+  unsigned CodePointerSize = 4;

  /// Size of the stack slot reserved for callee-saved registers, in bytes.
  /// Default is same as pointer size.
@ -384,8 +384,8 @@ public:
  explicit MCAsmInfo();
  virtual ~MCAsmInfo();

-  /// Get the pointer size in bytes.
-  unsigned getPointerSize() const { return PointerSize; }
+  /// Get the code pointer size in bytes.
+  unsigned getCodePointerSize() const { return CodePointerSize; }

  /// Get the callee-saved register stack slot
  /// size in bytes.
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@ -128,6 +128,7 @@ public:
  virtual void emitArch(unsigned Arch);
  virtual void emitArchExtension(unsigned ArchExt);
  virtual void emitObjectArch(unsigned Arch);
+  void emitTargetAttributes(const MCSubtargetInfo &STI);
  virtual void finishAttributeSection();
  virtual void emitInst(uint32_t Inst, char Suffix = '\0');

--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@ -86,6 +86,10 @@ public:
    FeatureBits = FeatureBits_;
  }

+  bool hasFeature(unsigned Feature) const {
+    return FeatureBits[Feature];
+  }
+
 protected:
  /// Initialize the scheduling model and feature bits.
  ///
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@ -14,15 +14,20 @@
 #ifndef LLVM_OBJECT_ARCHIVE_H
 #define LLVM_OBJECT_ARCHIVE_H

+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>

 namespace llvm {
 namespace object {
@ -32,25 +37,28 @@ class Archive;
 class ArchiveMemberHeader {
 public:
  friend class Archive;
+
  ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
                      uint64_t Size, Error *Err);
  // ArchiveMemberHeader() = default;

  /// Get the name without looking up long names.
-  Expected<llvm::StringRef> getRawName() const;
+  Expected<StringRef> getRawName() const;

  /// Get the name looking up long names.
-  Expected<llvm::StringRef> getName(uint64_t Size) const;
+  Expected<StringRef> getName(uint64_t Size) const;

  /// Members are not larger than 4GB.
  Expected<uint32_t> getSize() const;

  Expected<sys::fs::perms> getAccessMode() const;
  Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const;
-  llvm::StringRef getRawLastModified() const {
+
+  StringRef getRawLastModified() const {
    return StringRef(ArMemHdr->LastModified,
                     sizeof(ArMemHdr->LastModified)).rtrim(' ');
  }
+
  Expected<unsigned> getUID() const;
  Expected<unsigned> getGID() const;

@ -75,11 +83,13 @@ private:

 class Archive : public Binary {
  virtual void anchor();
+
 public:
  class Child {
    friend Archive;
-    const Archive *Parent;
    friend ArchiveMemberHeader;
+
+    const Archive *Parent;
    ArchiveMemberHeader Header;
    /// \brief Includes header but not padding byte.
    StringRef Data;
@ -103,17 +113,22 @@ public:
    Expected<StringRef> getName() const;
    Expected<std::string> getFullName() const;
    Expected<StringRef> getRawName() const { return Header.getRawName(); }
+
    Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
      return Header.getLastModified();
    }
+
    StringRef getRawLastModified() const {
      return Header.getRawLastModified();
    }
+
    Expected<unsigned> getUID() const { return Header.getUID(); }
    Expected<unsigned> getGID() const { return Header.getGID(); }
+
    Expected<sys::fs::perms> getAccessMode() const {
      return Header.getAccessMode();
    }
+
    /// \return the size of the archive member without the header or padding.
    Expected<uint64_t> getSize() const;
    /// \return the size in the archive header for this member.
@ -130,11 +145,12 @@ public:

  class child_iterator {
    Child C;
-    Error *E;
+    Error *E = nullptr;

  public:
-    child_iterator() : C(Child(nullptr, nullptr, nullptr)), E(nullptr) {}
+    child_iterator() : C(Child(nullptr, nullptr, nullptr)) {}
    child_iterator(const Child &C, Error *E) : C(C), E(E) {}
+
    const Child *operator->() const { return &C; }
    const Child &operator*() const { return C; }

@ -171,14 +187,15 @@ public:
    uint32_t StringIndex; // Extra index to the string.

  public:
-    bool operator ==(const Symbol &other) const {
-      return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
-    }
-
    Symbol(const Archive *p, uint32_t symi, uint32_t stri)
      : Parent(p)
      , SymbolIndex(symi)
      , StringIndex(stri) {}
+
+    bool operator ==(const Symbol &other) const {
+      return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
+    }
+
    StringRef getName() const;
    Expected<Child> getMember() const;
    Symbol getNext() const;
@ -186,8 +203,10 @@ public:

  class symbol_iterator {
    Symbol symbol;
+
  public:
    symbol_iterator(const Symbol &s) : symbol(s) {}
+
    const Symbol *operator->() const { return &symbol; }
    const Symbol &operator*() const { return symbol; }

@ -264,7 +283,7 @@ private:
  mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
 };

-}
-}
+} // end namespace object
+} // end namespace llvm

-#endif
+#endif // LLVM_OBJECT_ARCHIVE_H
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@ -15,10 +15,11 @@
 #define LLVM_OBJECT_BINARY_H

 #include "llvm/ADT/Triple.h"
-#include "llvm/Object/Error.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
+#include <utility>

 namespace llvm {

@ -29,9 +30,6 @@ namespace object {

 class Binary {
 private:
-  Binary() = delete;
-  Binary(const Binary &other) = delete;
-
  unsigned int TypeID;

 protected:
@ -80,6 +78,8 @@ protected:
  }

 public:
+  Binary() = delete;
+  Binary(const Binary &other) = delete;
  virtual ~Binary();

  StringRef getData() const;
@ -173,7 +173,7 @@ OwningBinary<T>::OwningBinary(std::unique_ptr<T> Bin,
                              std::unique_ptr<MemoryBuffer> Buf)
    : Bin(std::move(Bin)), Buf(std::move(Buf)) {}

-template <typename T> OwningBinary<T>::OwningBinary() {}
+template <typename T> OwningBinary<T>::OwningBinary() = default;

 template <typename T>
 OwningBinary<T>::OwningBinary(OwningBinary &&Other)
@ -201,7 +201,9 @@ template <typename T> const T* OwningBinary<T>::getBinary() const {
 }

 Expected<OwningBinary<Binary>> createBinary(StringRef Path);
-}
-}

-#endif
+} // end namespace object
+
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_BINARY_H
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@ -14,28 +14,39 @@
 #ifndef LLVM_OBJECT_COFF_H
 #define LLVM_OBJECT_COFF_H

-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <system_error>

 namespace llvm {
+
 template <typename T> class ArrayRef;

 namespace object {
-class ImportDirectoryEntryRef;
+
+class BaseRelocRef;
 class DelayImportDirectoryEntryRef;
 class ExportDirectoryEntryRef;
+class ImportDirectoryEntryRef;
 class ImportedSymbolRef;
-class BaseRelocRef;
-typedef content_iterator<ImportDirectoryEntryRef> import_directory_iterator;
-typedef content_iterator<DelayImportDirectoryEntryRef>
-    delay_import_directory_iterator;
-typedef content_iterator<ExportDirectoryEntryRef> export_directory_iterator;
-typedef content_iterator<ImportedSymbolRef> imported_symbol_iterator;
-typedef content_iterator<BaseRelocRef> base_reloc_iterator;
+
+using import_directory_iterator = content_iterator<ImportDirectoryEntryRef>;
+using delay_import_directory_iterator =
+    content_iterator<DelayImportDirectoryEntryRef>;
+using export_directory_iterator = content_iterator<ExportDirectoryEntryRef>;
+using imported_symbol_iterator = content_iterator<ImportedSymbolRef>;
+using base_reloc_iterator = content_iterator<BaseRelocRef>;

 /// The DOS compatible header at the front of all PE/COFF executables.
 struct dos_header {
@ -190,10 +201,10 @@ struct import_lookup_table_entry {
  }
 };

-typedef import_lookup_table_entry<support::little32_t>
-    import_lookup_table_entry32;
-typedef import_lookup_table_entry<support::little64_t>
-    import_lookup_table_entry64;
+using import_lookup_table_entry32 =
+    import_lookup_table_entry<support::little32_t>;
+using import_lookup_table_entry64 =
+    import_lookup_table_entry<support::little64_t>;

 struct delay_import_directory_table_entry {
  // dumpbin reports this field as "Characteristics" instead of "Attributes".
@ -226,8 +237,8 @@ union export_address_table_entry {
  support::ulittle32_t ForwarderRVA;
 };

-typedef support::ulittle32_t export_name_pointer_table_entry;
-typedef support::ulittle16_t export_ordinal_table_entry;
+using export_name_pointer_table_entry = support::ulittle32_t;
+using export_ordinal_table_entry = support::ulittle16_t;

 struct StringTableOffset {
  support::ulittle32_t Zeroes;
@ -250,8 +261,8 @@ struct coff_symbol {
  uint8_t NumberOfAuxSymbols;
 };

-typedef coff_symbol<support::ulittle16_t> coff_symbol16;
-typedef coff_symbol<support::ulittle32_t> coff_symbol32;
+using coff_symbol16 = coff_symbol<support::ulittle16_t>;
+using coff_symbol32 = coff_symbol<support::ulittle32_t>;

 // Contains only common parts of coff_symbol16 and coff_symbol32.
 struct coff_symbol_generic {
@ -264,9 +275,9 @@ struct coff_symbol_generic {

 class COFFSymbolRef {
 public:
-  COFFSymbolRef(const coff_symbol16 *CS) : CS16(CS), CS32(nullptr) {}
-  COFFSymbolRef(const coff_symbol32 *CS) : CS16(nullptr), CS32(CS) {}
-  COFFSymbolRef() : CS16(nullptr), CS32(nullptr) {}
+  COFFSymbolRef() = default;
+  COFFSymbolRef(const coff_symbol16 *CS) : CS16(CS) {}
+  COFFSymbolRef(const coff_symbol32 *CS) : CS32(CS) {}

  const void *getRawPtr() const {
    return CS16 ? static_cast<const void *>(CS16) : CS32;
@ -396,8 +407,8 @@ public:
 private:
  bool isSet() const { return CS16 || CS32; }

-  const coff_symbol16 *CS16;
-  const coff_symbol32 *CS32;
+  const coff_symbol16 *CS16 = nullptr;
+  const coff_symbol32 *CS32 = nullptr;
 };

 struct coff_section {
@ -418,6 +429,7 @@ struct coff_section {
    return (Characteristics & COFF::IMAGE_SCN_LNK_NRELOC_OVFL) &&
           NumberOfRelocations == UINT16_MAX;
  }
+
  uint32_t getAlignment() const {
    // The IMAGE_SCN_TYPE_NO_PAD bit is a legacy way of getting to
    // IMAGE_SCN_ALIGN_1BYTES.
@ -508,6 +520,7 @@ struct coff_import_header {
  support::ulittle32_t SizeOfData;
  support::ulittle16_t OrdinalHint;
  support::ulittle16_t TypeInfo;
+
  int getType() const { return TypeInfo & 0x3; }
  int getNameType() const { return (TypeInfo >> 2) & 0x7; }
 };
@ -518,6 +531,7 @@ struct coff_import_directory_table_entry {
  support::ulittle32_t ForwarderChain;
  support::ulittle32_t NameRVA;
  support::ulittle32_t ImportAddressTableRVA;
+
  bool isNull() const {
    return ImportLookupTableRVA == 0 && TimeDateStamp == 0 &&
           ForwarderChain == 0 && NameRVA == 0 && ImportAddressTableRVA == 0;
@ -532,6 +546,7 @@ struct coff_tls_directory {
  IntTy AddressOfCallBacks;
  support::ulittle32_t SizeOfZeroFill;
  support::ulittle32_t Characteristics;
+
  uint32_t getAlignment() const {
    // Bit [20:24] contains section alignment.
    uint32_t Shift = (Characteristics & 0x00F00000) >> 20;
@ -541,8 +556,8 @@ struct coff_tls_directory {
  }
 };

-typedef coff_tls_directory<support::little32_t> coff_tls_directory32;
-typedef coff_tls_directory<support::little64_t> coff_tls_directory64;
+using coff_tls_directory32 = coff_tls_directory<support::little32_t>;
+using coff_tls_directory64 = coff_tls_directory<support::little64_t>;

 struct coff_load_configuration32 {
  support::ulittle32_t Characteristics;
@ -603,6 +618,7 @@ struct coff_base_reloc_block_header {

 struct coff_base_reloc_block_entry {
  support::ulittle16_t Data;
+
  int getType() const { return Data >> 12; }
  int getOffset() const { return Data & ((1 << 12) - 1); }
 };
@ -652,6 +668,7 @@ public:
      return reinterpret_cast<uintptr_t>(SymbolTable32);
    return uintptr_t(0);
  }
+
  uint16_t getMachine() const {
    if (COFFHeader)
      return COFFHeader->Machine;
@ -659,6 +676,7 @@ public:
      return COFFBigObjHeader->Machine;
    llvm_unreachable("no COFF header!");
  }
+
  uint16_t getSizeOfOptionalHeader() const {
    if (COFFHeader)
      return COFFHeader->isImportLibrary() ? 0
@ -668,6 +686,7 @@ public:
      return 0;
    llvm_unreachable("no COFF header!");
  }
+
  uint16_t getCharacteristics() const {
    if (COFFHeader)
      return COFFHeader->isImportLibrary() ? 0 : COFFHeader->Characteristics;
@ -677,6 +696,7 @@ public:
      return 0;
    llvm_unreachable("no COFF header!");
  }
+
  uint32_t getTimeDateStamp() const {
    if (COFFHeader)
      return COFFHeader->TimeDateStamp;
@ -684,6 +704,7 @@ public:
      return COFFBigObjHeader->TimeDateStamp;
    llvm_unreachable("no COFF header!");
  }
+
  uint32_t getNumberOfSections() const {
    if (COFFHeader)
      return COFFHeader->isImportLibrary() ? 0 : COFFHeader->NumberOfSections;
@ -691,6 +712,7 @@ public:
      return COFFBigObjHeader->NumberOfSections;
    llvm_unreachable("no COFF header!");
  }
+
  uint32_t getPointerToSymbolTable() const {
    if (COFFHeader)
      return COFFHeader->isImportLibrary() ? 0
@ -699,6 +721,7 @@ public:
      return COFFBigObjHeader->PointerToSymbolTable;
    llvm_unreachable("no COFF header!");
  }
+
  uint32_t getRawNumberOfSymbols() const {
    if (COFFHeader)
      return COFFHeader->isImportLibrary() ? 0 : COFFHeader->NumberOfSymbols;
@ -706,11 +729,13 @@ public:
      return COFFBigObjHeader->NumberOfSymbols;
    llvm_unreachable("no COFF header!");
  }
+
  uint32_t getNumberOfSymbols() const {
    if (!SymbolTable16 && !SymbolTable32)
      return 0;
    return getRawNumberOfSymbols();
  }
+
 protected:
  void moveSymbolNext(DataRefImpl &Symb) const override;
  Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
@ -746,6 +771,7 @@ protected:

 public:
  COFFObjectFile(MemoryBufferRef Object, std::error_code &EC);
+
  basic_symbol_iterator symbol_begin() const override;
  basic_symbol_iterator symbol_end() const override;
  section_iterator section_begin() const override;
@ -797,6 +823,7 @@ public:
  std::error_code getDataDirectory(uint32_t index,
                                   const data_directory *&Res) const;
  std::error_code getSection(int32_t index, const coff_section *&Res) const;
+
  template <typename coff_symbol_type>
  std::error_code getSymbol(uint32_t Index,
                            const coff_symbol_type *&Res) const {
@ -821,6 +848,7 @@ public:
    }
    return object_error::parse_failed;
  }
+
  template <typename T>
  std::error_code getAuxSymbol(uint32_t index, const T *&Res) const {
    ErrorOr<COFFSymbolRef> s = getSymbol(index);
@ -829,6 +857,7 @@ public:
    Res = reinterpret_cast<const T *>(s->getRawPtr());
    return std::error_code();
  }
+
  std::error_code getSymbolName(COFFSymbolRef Symbol, StringRef &Res) const;
  std::error_code getSymbolName(const coff_symbol_generic *Symbol,
                                StringRef &Res) const;
@ -885,7 +914,7 @@ public:
 // The iterator for the import directory table.
 class ImportDirectoryEntryRef {
 public:
-  ImportDirectoryEntryRef() : OwningObject(nullptr) {}
+  ImportDirectoryEntryRef() = default;
  ImportDirectoryEntryRef(const coff_import_directory_table_entry *Table,
                          uint32_t I, const COFFObjectFile *Owner)
      : ImportTable(Table), Index(I), OwningObject(Owner) {}
@ -911,12 +940,12 @@ public:
 private:
  const coff_import_directory_table_entry *ImportTable;
  uint32_t Index;
-  const COFFObjectFile *OwningObject;
+  const COFFObjectFile *OwningObject = nullptr;
 };

 class DelayImportDirectoryEntryRef {
 public:
-  DelayImportDirectoryEntryRef() : OwningObject(nullptr) {}
+  DelayImportDirectoryEntryRef() = default;
  DelayImportDirectoryEntryRef(const delay_import_directory_table_entry *T,
                               uint32_t I, const COFFObjectFile *Owner)
      : Table(T), Index(I), OwningObject(Owner) {}
@ -936,13 +965,13 @@ public:
 private:
  const delay_import_directory_table_entry *Table;
  uint32_t Index;
-  const COFFObjectFile *OwningObject;
+  const COFFObjectFile *OwningObject = nullptr;
 };

 // The iterator for the export directory table entry.
 class ExportDirectoryEntryRef {
 public:
-  ExportDirectoryEntryRef() : OwningObject(nullptr) {}
+  ExportDirectoryEntryRef() = default;
  ExportDirectoryEntryRef(const export_directory_table_entry *Table, uint32_t I,
                          const COFFObjectFile *Owner)
      : ExportTable(Table), Index(I), OwningObject(Owner) {}
@ -962,12 +991,12 @@ public:
 private:
  const export_directory_table_entry *ExportTable;
  uint32_t Index;
-  const COFFObjectFile *OwningObject;
+  const COFFObjectFile *OwningObject = nullptr;
 };

 class ImportedSymbolRef {
 public:
-  ImportedSymbolRef() : OwningObject(nullptr) {}
+  ImportedSymbolRef() = default;
  ImportedSymbolRef(const import_lookup_table_entry32 *Entry, uint32_t I,
                    const COFFObjectFile *Owner)
      : Entry32(Entry), Entry64(nullptr), Index(I), OwningObject(Owner) {}
@ -987,12 +1016,12 @@ private:
  const import_lookup_table_entry32 *Entry32;
  const import_lookup_table_entry64 *Entry64;
  uint32_t Index;
-  const COFFObjectFile *OwningObject;
+  const COFFObjectFile *OwningObject = nullptr;
 };

 class BaseRelocRef {
 public:
-  BaseRelocRef() : OwningObject(nullptr) {}
+  BaseRelocRef() = default;
  BaseRelocRef(const coff_base_reloc_block_header *Header,
               const COFFObjectFile *Owner)
      : Header(Header), Index(0), OwningObject(Owner) {}
@ -1006,7 +1035,7 @@ public:
 private:
  const coff_base_reloc_block_header *Header;
  uint32_t Index;
-  const COFFObjectFile *OwningObject;
+  const COFFObjectFile *OwningObject = nullptr;
 };

 // Corresponds to `_FPO_DATA` structure in the PE/COFF spec.
@ -1034,6 +1063,7 @@ struct FpoData {
 };

 } // end namespace object
+
 } // end namespace llvm

-#endif
+#endif // LLVM_OBJECT_COFF_H
--- a/include/llvm/Object/IRSymtab.h
+++ b/include/llvm/Object/IRSymtab.h
@ -41,9 +41,9 @@ typedef support::ulittle32_t Word;

 /// A reference to a string in the string table.
 struct Str {
-  Word Offset;
+  Word Offset, Size;
  StringRef get(StringRef Strtab) const {
-    return Strtab.data() + Offset;
+    return {Strtab.data() + Offset, Size};
  }
 };

@ -59,6 +59,9 @@ template <typename T> struct Range {
 /// table.
 struct Module {
  Word Begin, End;
+
+  /// The index of the first Uncommon for this Module.
+  Word UncBegin;
 };

 /// This is equivalent to an IR comdat.
@ -82,7 +85,8 @@ struct Symbol {
  Word Flags;
  enum FlagBits {
    FB_visibility, // 2 bits
-    FB_undefined = FB_visibility + 2,
+    FB_has_uncommon = FB_visibility + 2,
+    FB_undefined,
    FB_weak,
    FB_common,
    FB_indirect,
@ -94,10 +98,6 @@ struct Symbol {
    FB_unnamed_addr,
    FB_executable,
  };
-
-  /// The index into the Uncommon table, or -1 if this symbol does not have an
-  /// Uncommon.
-  Word UncommonIndex;
 };

 /// This data structure contains rarely used symbol fields and is optionally
@ -249,15 +249,9 @@ public:
 /// Reader::module_symbols().
 class Reader::SymbolRef : public Symbol {
  const storage::Symbol *SymI, *SymE;
+  const storage::Uncommon *UncI;
  const Reader *R;

-public:
-  SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
-            const Reader *R)
-      : SymI(SymI), SymE(SymE), R(R) {
-    read();
-  }
-
  void read() {
    if (SymI == SymE)
      return;
@ -267,16 +261,24 @@ public:
    ComdatIndex = SymI->ComdatIndex;
    Flags = SymI->Flags;

-    uint32_t UncI = SymI->UncommonIndex;
-    if (UncI != -1u) {
-      const storage::Uncommon &Unc = R->Uncommons[UncI];
-      CommonSize = Unc.CommonSize;
-      CommonAlign = Unc.CommonAlign;
-      COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName);
+    if (Flags & (1 << storage::Symbol::FB_has_uncommon)) {
+      CommonSize = UncI->CommonSize;
+      CommonAlign = UncI->CommonAlign;
+      COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
    }
  }
+
+public:
+  SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
+            const storage::Uncommon *UncI, const Reader *R)
+      : SymI(SymI), SymE(SymE), UncI(UncI), R(R) {
+    read();
+  }
+
  void moveNext() {
    ++SymI;
+    if (Flags & (1 << storage::Symbol::FB_has_uncommon))
+      ++UncI;
    read();
  }

@ -284,15 +286,16 @@ public:
 };

 inline Reader::symbol_range Reader::symbols() const {
-  return {SymbolRef(Symbols.begin(), Symbols.end(), this),
-          SymbolRef(Symbols.end(), Symbols.end(), this)};
+  return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this),
+          SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)};
 }

 inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
  const storage::Module &M = Modules[I];
  const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
                        *MEnd = Symbols.begin() + M.End;
-  return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)};
+  return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this),
+          SymbolRef(MEnd, MEnd, nullptr, this)};
 }

 }
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@ -14,39 +14,46 @@
 #ifndef LLVM_OBJECT_OBJECTFILE_H
 #define LLVM_OBJECT_OBJECTFILE_H

+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <cstring>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <system_error>

 namespace llvm {
+
 class ARMAttributeParser;

 namespace object {

-class ObjectFile;
 class COFFObjectFile;
 class MachOObjectFile;
-class WasmObjectFile;
-
+class ObjectFile;
+class SectionRef;
 class SymbolRef;
 class symbol_iterator;
-class SectionRef;
-typedef content_iterator<SectionRef> section_iterator;
+class WasmObjectFile;
+
+using section_iterator = content_iterator<SectionRef>;

 /// This is a value type class that represents a single relocation in the list
 /// of relocations in the object file.
 class RelocationRef {
  DataRefImpl RelocationPimpl;
-  const ObjectFile *OwningObject;
+  const ObjectFile *OwningObject = nullptr;

 public:
-  RelocationRef() : OwningObject(nullptr) { }
-
+  RelocationRef() = default;
  RelocationRef(DataRefImpl RelocationP, const ObjectFile *Owner);

  bool operator==(const RelocationRef &Other) const;
@ -65,18 +72,19 @@ public:
  DataRefImpl getRawDataRefImpl() const;
  const ObjectFile *getObject() const;
 };
-typedef content_iterator<RelocationRef> relocation_iterator;
+
+using relocation_iterator = content_iterator<RelocationRef>;

 /// This is a value type class that represents a single section in the list of
 /// sections in the object file.
 class SectionRef {
  friend class SymbolRef;
+
  DataRefImpl SectionPimpl;
-  const ObjectFile *OwningObject;
+  const ObjectFile *OwningObject = nullptr;

 public:
-  SectionRef() : OwningObject(nullptr) { }
-
+  SectionRef() = default;
  SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);

  bool operator==(const SectionRef &Other) const;
@ -119,8 +127,6 @@ class SymbolRef : public BasicSymbolRef {
  friend class SectionRef;

 public:
-  SymbolRef() : BasicSymbolRef() {}
-
  enum Type {
    ST_Unknown, // Type not specified
    ST_Data,
@ -130,6 +136,7 @@ public:
    ST_Other
  };

+  SymbolRef() = default;
  SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
  SymbolRef(const BasicSymbolRef &B) : BasicSymbolRef(B) {
    assert(isa<ObjectFile>(BasicSymbolRef::getObject()));
@ -179,8 +186,6 @@ public:
 /// to create.
 class ObjectFile : public SymbolicFile {
  virtual void anchor();
-  ObjectFile() = delete;
-  ObjectFile(const ObjectFile &other) = delete;

 protected:
  ObjectFile(unsigned int Type, MemoryBufferRef Source);
@ -198,6 +203,7 @@ protected:
  // Implementations assume that the DataRefImpl is valid and has not been
  // modified externally. It's UB otherwise.
  friend class SymbolRef;
+
  virtual Expected<StringRef> getSymbolName(DataRefImpl Symb) const = 0;
  std::error_code printSymbolName(raw_ostream &OS,
                                  DataRefImpl Symb) const override;
@ -211,6 +217,7 @@ protected:

  // Same as above for SectionRef.
  friend class SectionRef;
+
  virtual void moveSectionNext(DataRefImpl &Sec) const = 0;
  virtual std::error_code getSectionName(DataRefImpl Sec,
                                         StringRef &Res) const = 0;
@ -242,12 +249,15 @@ protected:
  uint64_t getSymbolValue(DataRefImpl Symb) const;

 public:
+  ObjectFile() = delete;
+  ObjectFile(const ObjectFile &other) = delete;
+
  uint64_t getCommonSymbolSize(DataRefImpl Symb) const {
    assert(getSymbolFlags(Symb) & SymbolRef::SF_Common);
    return getCommonSymbolSizeImpl(Symb);
  }

-  typedef iterator_range<symbol_iterator> symbol_iterator_range;
+  using symbol_iterator_range = iterator_range<symbol_iterator>;
  symbol_iterator_range symbols() const {
    return symbol_iterator_range(symbol_begin(), symbol_end());
  }
@ -255,7 +265,7 @@ public:
  virtual section_iterator section_begin() const = 0;
  virtual section_iterator section_end() const = 0;

-  typedef iterator_range<section_iterator> section_iterator_range;
+  using section_iterator_range = iterator_range<section_iterator>;
  section_iterator_range sections() const {
    return section_iterator_range(section_begin(), section_end());
  }
@ -297,7 +307,6 @@ public:
    return createObjectFile(Object, sys::fs::file_magic::unknown);
  }

-
  static inline bool classof(const Binary *v) {
    return v->isObject();
  }
@ -354,7 +363,6 @@ inline const ObjectFile *SymbolRef::getObject() const {
  return cast<ObjectFile>(O);
 }

-
 /// SectionRef
 inline SectionRef::SectionRef(DataRefImpl SectionP,
                              const ObjectFile *Owner)
@ -479,8 +487,8 @@ inline const ObjectFile *RelocationRef::getObject() const {
  return OwningObject;
 }

-
 } // end namespace object
+
 } // end namespace llvm

-#endif
+#endif // LLVM_OBJECT_OBJECTFILE_H
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@ -14,10 +14,19 @@
 #ifndef LLVM_OBJECT_SYMBOLICFILE_H
 #define LLVM_OBJECT_SYMBOLICFILE_H

+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <cinttypes>
-#include <utility>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <system_error>

 namespace llvm {
 namespace object {
@ -29,6 +38,7 @@ union DataRefImpl {
    uint32_t a, b;
  } d;
  uintptr_t p;
+
  DataRefImpl() { std::memset(this, 0, sizeof(DataRefImpl)); }
 };

@ -87,7 +97,7 @@ class SymbolicFile;
 /// symbols in the object file.
 class BasicSymbolRef {
  DataRefImpl SymbolPimpl;
-  const SymbolicFile *OwningObject;
+  const SymbolicFile *OwningObject = nullptr;

 public:
  enum Flags : unsigned {
@ -108,7 +118,7 @@ public:
                                 // (IR only)
  };

-  BasicSymbolRef() : OwningObject(nullptr) { }
+  BasicSymbolRef() = default;
  BasicSymbolRef(DataRefImpl SymbolP, const SymbolicFile *Owner);

  bool operator==(const BasicSymbolRef &Other) const;
@ -125,12 +135,12 @@ public:
  const SymbolicFile *getObject() const;
 };

-typedef content_iterator<BasicSymbolRef> basic_symbol_iterator;
+using basic_symbol_iterator = content_iterator<BasicSymbolRef>;

 class SymbolicFile : public Binary {
 public:
-  ~SymbolicFile() override;
  SymbolicFile(unsigned int Type, MemoryBufferRef Source);
+  ~SymbolicFile() override;

  // virtual interface.
  virtual void moveSymbolNext(DataRefImpl &Symb) const = 0;
@ -145,7 +155,7 @@ public:
  virtual basic_symbol_iterator symbol_end() const = 0;

  // convenience wrappers.
-  typedef iterator_range<basic_symbol_iterator> basic_symbol_iterator_range;
+  using basic_symbol_iterator_range = iterator_range<basic_symbol_iterator>;
  basic_symbol_iterator_range symbols() const {
    return basic_symbol_iterator_range(symbol_begin(), symbol_end());
  }
@ -199,7 +209,7 @@ inline const SymbolicFile *BasicSymbolRef::getObject() const {
  return OwningObject;
 }

-}
-}
+} // end namespace object
+} // end namespace llvm

-#endif
+#endif // LLVM_OBJECT_SYMBOLICFILE_H
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@ -236,7 +236,7 @@ template <> struct MappingTraits<DWARFYAML::InitialLength> {
  static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF);
 };

-#define HANDLE_DW_TAG(unused, name)                                            \
+#define HANDLE_DW_TAG(unused, name, unused2, unused3)                          \
  io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name);

 template <> struct ScalarEnumerationTraits<dwarf::Tag> {
@ -266,7 +266,7 @@ template <> struct ScalarEnumerationTraits<dwarf::LineNumberExtendedOps> {
  }
 };

-#define HANDLE_DW_AT(unused, name)                                             \
+#define HANDLE_DW_AT(unused, name, unused2, unused3)                           \
  io.enumCase(value, "DW_AT_" #name, dwarf::DW_AT_##name);

 template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
@ -276,7 +276,7 @@ template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
  }
 };

-#define HANDLE_DW_FORM(unused, name)                                           \
+#define HANDLE_DW_FORM(unused, name, unused2, unused3)                         \
  io.enumCase(value, "DW_FORM_" #name, dwarf::DW_FORM_##name);

 template <> struct ScalarEnumerationTraits<dwarf::Form> {
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@ -93,11 +93,7 @@ template <typename PassName> Pass *callTargetMachineCtor(TargetMachine *TM) {
 /// static RegisterPass<YourPassClassName> tmp("passopt", "My Pass Name");
 ///
 /// This statement will cause your pass to be created by calling the default
-/// constructor exposed by the pass.  If you have a different constructor that
-/// must be called, create a global constructor function (which takes the
-/// arguments you need and returns a Pass*) and register your pass like this:
-///
-/// static RegisterPass<PassClassName> tmp("passopt", "My Name");
+/// constructor exposed by the pass.
 ///
 template <typename passName> struct RegisterPass : public PassInfo {
  // Register Pass using default constructor...
--- a/include/llvm/Support/ARMTargetParser.def
+++ b/include/llvm/Support/ARMTargetParser.def
@ -78,33 +78,33 @@ ARM_ARCH("armv7-a", AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7,
          FK_NEON, ARM::AEK_DSP)
 ARM_ARCH("armv7ve", AK_ARMV7VE, "7VE", "v7ve", ARMBuildAttrs::CPUArch::v7,
          FK_NEON, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
-          ARM::AEK_HWDIVARM | ARM::AEK_HWDIV | ARM::AEK_DSP))
+          ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP))
 ARM_ARCH("armv7-r", AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7,
-          FK_NONE, (ARM::AEK_HWDIV | ARM::AEK_DSP))
+          FK_NONE, (ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP))
 ARM_ARCH("armv7-m", AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7,
-          FK_NONE, ARM::AEK_HWDIV)
+          FK_NONE, ARM::AEK_HWDIVTHUMB)
 ARM_ARCH("armv7e-m", AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M,
-          FK_NONE, (ARM::AEK_HWDIV | ARM::AEK_DSP))
+          FK_NONE, (ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP))
 ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A,
         FK_CRYPTO_NEON_FP_ARMV8,
         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC))
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC))
 ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a",
         ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC))
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC))
 ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a",
         ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS))
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS))
 ARM_ARCH("armv8-r", AK_ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
          FK_NEON_FP_ARMV8,
-          (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIV |
+          (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
           ARM::AEK_DSP | ARM::AEK_CRC))
 ARM_ARCH("armv8-m.base", AK_ARMV8MBaseline, "8-M.Baseline", "v8m.base",
-          ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIV)
+          ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIVTHUMB)
 ARM_ARCH("armv8-m.main", AK_ARMV8MMainline, "8-M.Mainline", "v8m.main",
-          ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIV)
+          ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIVTHUMB)
 // Non-standard Arch names.
 ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
          FK_NONE, ARM::AEK_NONE)
@ -128,7 +128,7 @@ ARM_ARCH_EXT_NAME("crc",      ARM::AEK_CRC,      "+crc",   "-crc")
 ARM_ARCH_EXT_NAME("crypto",   ARM::AEK_CRYPTO,   "+crypto","-crypto")
 ARM_ARCH_EXT_NAME("dsp",      ARM::AEK_DSP,      "+dsp",   "-dsp")
 ARM_ARCH_EXT_NAME("fp",       ARM::AEK_FP,       nullptr,  nullptr)
-ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV), nullptr, nullptr)
+ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
 ARM_ARCH_EXT_NAME("mp",       ARM::AEK_MP,       nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("simd",     ARM::AEK_SIMD,     nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("sec",      ARM::AEK_SEC,      nullptr,  nullptr)
@ -147,9 +147,9 @@ ARM_ARCH_EXT_NAME("xscale",   ARM::AEK_XSCALE,   nullptr,  nullptr)
 #endif
 ARM_HW_DIV_NAME("invalid", ARM::AEK_INVALID)
 ARM_HW_DIV_NAME("none", ARM::AEK_NONE)
-ARM_HW_DIV_NAME("thumb", ARM::AEK_HWDIV)
+ARM_HW_DIV_NAME("thumb", ARM::AEK_HWDIVTHUMB)
 ARM_HW_DIV_NAME("arm", ARM::AEK_HWDIVARM)
-ARM_HW_DIV_NAME("arm,thumb", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV))
+ARM_HW_DIV_NAME("arm,thumb", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB))
 #undef ARM_HW_DIV_NAME

 #ifndef ARM_CPU_NAME
@ -205,20 +205,20 @@ ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false,
             (ARM::AEK_SEC | ARM::AEK_MP))
 ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false,
             (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-              ARM::AEK_HWDIV))
+              ARM::AEK_HWDIVTHUMB))
 ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, ARM::AEK_SEC)
 ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (ARM::AEK_SEC | ARM::AEK_MP))
 ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false,
             (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-              ARM::AEK_HWDIV))
+              ARM::AEK_HWDIVTHUMB))
 ARM_CPU_NAME("cortex-a15", AK_ARMV7A, FK_NEON_VFPV4, false,
             (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-              ARM::AEK_HWDIV))
+              ARM::AEK_HWDIVTHUMB))
 ARM_CPU_NAME("cortex-a17", AK_ARMV7A, FK_NEON_VFPV4, false,
             (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-              ARM::AEK_HWDIV))
+              ARM::AEK_HWDIVTHUMB))
 ARM_CPU_NAME("krait", AK_ARMV7A, FK_NEON_VFPV4, false,
-             (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV))
+             (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB))
 ARM_CPU_NAME("cortex-r4", AK_ARMV7R, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-r4f", AK_ARMV7R, FK_VFPV3_D16, false, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-r5", AK_ARMV7R, FK_VFPV3_D16, false,
@ -249,7 +249,7 @@ ARM_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("swift", AK_ARMV7S, FK_NEON_VFPV4, true,
-             (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV))
+             (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB))
 // Invalid CPU
 ARM_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, ARM::AEK_INVALID)
 #undef ARM_CPU_NAME
--- a/include/llvm/Support/ArrayRecycler.h
+++ b/include/llvm/Support/ArrayRecycler.h
@ -47,7 +47,9 @@ template <class T, size_t Align = alignof(T)> class ArrayRecycler {
    FreeList *Entry = Bucket[Idx];
    if (!Entry)
      return nullptr;
+    __asan_unpoison_memory_region(Entry, Capacity::get(Idx).getSize());
    Bucket[Idx] = Entry->Next;
+    __msan_allocated_memory(Entry, Capacity::get(Idx).getSize());
    return reinterpret_cast<T*>(Entry);
  }

@ -59,6 +61,7 @@ template <class T, size_t Align = alignof(T)> class ArrayRecycler {
      Bucket.resize(size_t(Idx) + 1);
    Entry->Next = Bucket[Idx];
    Bucket[Idx] = Entry;
+    __asan_poison_memory_region(Ptr, Capacity::get(Idx).getSize());
  }

 public:
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@ -162,6 +162,11 @@ public:
    return ThisValue;
  }

+  ValueType &operator*() {
+    assert(Array && !HasError);
+    return ThisValue;
+  }
+
  IterType &operator+=(unsigned N) {
    for (unsigned I = 0; I < N; ++I) {
      // We are done with the current record, discard it so that we are
--- a/include/llvm/Support/Dwarf.def
+++ b/include/llvm/Support/Dwarf.def
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@ -46,7 +46,15 @@ enum LLVMConstants : uint32_t {
  DWARF_VERSION = 4,       // Default dwarf version we output.
  DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes.
  DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames.
-  DW_ARANGES_VERSION = 2   // Section version number for .debug_aranges.
+  DW_ARANGES_VERSION = 2,  // Section version number for .debug_aranges.
+  // Identifiers we use to distinguish vendor extensions.
+  DWARF_VENDOR_DWARF = 0,  // Defined in v2 or later of the DWARF standard.
+  DWARF_VENDOR_APPLE = 1,
+  DWARF_VENDOR_BORLAND = 2,
+  DWARF_VENDOR_GNU = 3,
+  DWARF_VENDOR_GOOGLE = 4,
+  DWARF_VENDOR_LLVM = 5,
+  DWARF_VENDOR_MIPS = 6
 };

 // Special ID values that distinguish a CIE from a FDE in DWARF CFI.
@ -55,7 +63,7 @@ const uint32_t DW_CIE_ID = UINT32_MAX;
 const uint64_t DW64_CIE_ID = UINT64_MAX;

 enum Tag : uint16_t {
-#define HANDLE_DW_TAG(ID, NAME) DW_TAG_##NAME = ID,
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_TAG_lo_user = 0x4080,
  DW_TAG_hi_user = 0xffff,
@ -92,20 +100,20 @@ inline bool isType(Tag T) {

 /// Attributes.
 enum Attribute : uint16_t {
-#define HANDLE_DW_AT(ID, NAME) DW_AT_##NAME = ID,
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_AT_lo_user = 0x2000,
  DW_AT_hi_user = 0x3fff,
 };

 enum Form : uint16_t {
-#define HANDLE_DW_FORM(ID, NAME) DW_FORM_##NAME = ID,
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
 DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF.
 };

 enum LocationAtom {
-#define HANDLE_DW_OP(ID, NAME) DW_OP_##NAME = ID,
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_OP_lo_user = 0xe0,
  DW_OP_hi_user = 0xff,
@ -113,7 +121,7 @@ enum LocationAtom {
 };

 enum TypeKind {
-#define HANDLE_DW_ATE(ID, NAME) DW_ATE_##NAME = ID,
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_ATE_lo_user = 0x80,
  DW_ATE_hi_user = 0xff
@ -164,7 +172,7 @@ enum DefaultedMemberAttribute {
 };

 enum SourceLanguage {
-#define HANDLE_DW_LANG(ID, NAME) DW_LANG_##NAME = ID,
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_LANG_lo_user = 0x8000,
  DW_LANG_hi_user = 0xffff
@ -220,8 +228,8 @@ enum LineNumberExtendedOps {
  DW_LNE_hi_user = 0xff
 };

-enum LinerNumberEntryFormat {
-#define HANDLE_DW_LNCT(ID, NAME) DW_DEFAULTED_##NAME = ID,
+enum LineNumberEntryFormat {
+#define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_LNCT_lo_user = 0x2000,
  DW_LNCT_hi_user = 0x3fff,
@ -406,6 +414,40 @@ unsigned getAttributeEncoding(StringRef EncodingString);
 unsigned getMacinfo(StringRef MacinfoString);
 /// @}

+/// \defgroup DwarfConstantsVersioning Dwarf version for constants
+///
+/// For constants defined by DWARF, returns the DWARF version when the constant
+/// was first defined. For vendor extensions, if there is a version-related
+/// policy for when to emit it, returns a version number for that policy.
+/// Otherwise returns 0.
+///
+/// @{
+unsigned TagVersion(Tag T);
+unsigned AttributeVersion(Attribute A);
+unsigned FormVersion(Form F);
+unsigned OperationVersion(LocationAtom O);
+unsigned AttributeEncodingVersion(TypeKind E);
+unsigned LanguageVersion(SourceLanguage L);
+/// @}
+
+/// \defgroup DwarfConstantsVendor Dwarf "vendor" for constants
+///
+/// These functions return an identifier describing "who" defined the constant,
+/// either the DWARF standard itself or the vendor who defined the extension.
+///
+/// @{
+unsigned TagVendor(Tag T);
+unsigned AttributeVendor(Attribute A);
+unsigned FormVendor(Form F);
+unsigned OperationVendor(LocationAtom O);
+unsigned AttributeEncodingVendor(TypeKind E);
+unsigned LanguageVendor(SourceLanguage L);
+/// @}
+
+/// Tells whether the specified form is defined in the specified version,
+/// or is an extension if extensions are allowed.
+bool isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk = true);
+
 /// \brief Returns the symbolic string representing Val when used as a value
 /// for attribute Attr.
 StringRef AttributeValueString(uint16_t Attr, unsigned Val);
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@ -276,32 +276,25 @@ protected:

  // NewBB is split and now it has one successor. Update dominator tree to
  // reflect this change.
-  template <class N, class GraphT>
-  void Split(DominatorTreeBaseByGraphTraits<GraphT> &DT,
-             typename GraphT::NodeRef NewBB) {
+  template <class N>
+  void Split(typename GraphTraits<N>::NodeRef NewBB) {
+    using GraphT = GraphTraits<N>;
+    using NodeRef = typename GraphT::NodeRef;
    assert(std::distance(GraphT::child_begin(NewBB),
                         GraphT::child_end(NewBB)) == 1 &&
           "NewBB should have a single successor!");
-    typename GraphT::NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
+    NodeRef NewBBSucc = *GraphT::child_begin(NewBB);

-    std::vector<typename GraphT::NodeRef> PredBlocks;
-    typedef GraphTraits<Inverse<N>> InvTraits;
-    for (typename InvTraits::ChildIteratorType
-             PI = InvTraits::child_begin(NewBB),
-             PE = InvTraits::child_end(NewBB);
-         PI != PE; ++PI)
-      PredBlocks.push_back(*PI);
+    std::vector<NodeRef> PredBlocks;
+    for (const auto Pred : children<Inverse<N>>(NewBB))
+      PredBlocks.push_back(Pred);

    assert(!PredBlocks.empty() && "No predblocks?");

    bool NewBBDominatesNewBBSucc = true;
-    for (typename InvTraits::ChildIteratorType
-             PI = InvTraits::child_begin(NewBBSucc),
-             E = InvTraits::child_end(NewBBSucc);
-         PI != E; ++PI) {
-      typename InvTraits::NodeRef ND = *PI;
-      if (ND != NewBB && !DT.dominates(NewBBSucc, ND) &&
-          DT.isReachableFromEntry(ND)) {
+    for (const auto Pred : children<Inverse<N>>(NewBBSucc)) {
+      if (Pred != NewBB && !dominates(NewBBSucc, Pred) &&
+          isReachableFromEntry(Pred)) {
        NewBBDominatesNewBBSucc = false;
        break;
      }
@ -312,7 +305,7 @@ protected:
    NodeT *NewBBIDom = nullptr;
    unsigned i = 0;
    for (i = 0; i < PredBlocks.size(); ++i)
-      if (DT.isReachableFromEntry(PredBlocks[i])) {
+      if (isReachableFromEntry(PredBlocks[i])) {
        NewBBIDom = PredBlocks[i];
        break;
      }
@ -324,18 +317,18 @@ protected:
      return;

    for (i = i + 1; i < PredBlocks.size(); ++i) {
-      if (DT.isReachableFromEntry(PredBlocks[i]))
-        NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
+      if (isReachableFromEntry(PredBlocks[i]))
+        NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
    }

    // Create the new dominator tree node... and set the idom of NewBB.
-    DomTreeNodeBase<NodeT> *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom);
+    DomTreeNodeBase<NodeT> *NewBBNode = addNewBlock(NewBB, NewBBIDom);

    // If NewBB strictly dominates other blocks, then it is now the immediate
    // dominator of NewBBSucc.  Update the dominator tree as appropriate.
    if (NewBBDominatesNewBBSucc) {
-      DomTreeNodeBase<NodeT> *NewBBSuccNode = DT.getNode(NewBBSucc);
-      DT.changeImmediateDominator(NewBBSuccNode, NewBBNode);
+      DomTreeNodeBase<NodeT> *NewBBSuccNode = getNode(NewBBSucc);
+      changeImmediateDominator(NewBBSuccNode, NewBBNode);
    }
  }

@ -379,7 +372,7 @@ public:
    if (DomTreeNodes.size() != OtherDomTreeNodes.size())
      return true;

-    for (const auto &DomTreeNode : this->DomTreeNodes) {
+    for (const auto &DomTreeNode : DomTreeNodes) {
      NodeT *BB = DomTreeNode.first;
      typename DomTreeNodeMapType::const_iterator OI =
          OtherDomTreeNodes.find(BB);
@ -663,10 +656,9 @@ public:
  /// tree to reflect this change.
  void splitBlock(NodeT *NewBB) {
    if (this->IsPostDominators)
-      this->Split<Inverse<NodeT *>, GraphTraits<Inverse<NodeT *>>>(*this,
-                                                                   NewBB);
+      Split<Inverse<NodeT *>>(NewBB);
    else
-      this->Split<NodeT *, GraphTraits<NodeT *>>(*this, NewBB);
+      Split<NodeT *>(NewBB);
  }

  /// print - Convert to human readable form
@ -677,7 +669,7 @@ public:
      o << "Inorder PostDominator Tree: ";
    else
      o << "Inorder Dominator Tree: ";
-    if (!this->DFSInfoValid)
+    if (!DFSInfoValid)
      o << "DFSNumbers invalid: " << SlowQueries << " slow queries.";
    o << "\n";

@ -712,12 +704,12 @@ protected:
    // immediate dominator.
    NodeT *IDom = getIDom(BB);

-    assert(IDom || this->DomTreeNodes[nullptr]);
+    assert(IDom || DomTreeNodes[nullptr]);
    DomTreeNodeBase<NodeT> *IDomNode = getNodeForBlock(IDom);

    // Add a new tree node for this NodeT, and link it as a child of
    // IDomNode
-    return (this->DomTreeNodes[BB] = IDomNode->addChild(
+    return (DomTreeNodes[BB] = IDomNode->addChild(
                llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode))).get();
  }

@ -780,7 +772,7 @@ public:
  template <class FT> void recalculate(FT &F) {
    typedef GraphTraits<FT *> TraitsTy;
    reset();
-    this->Vertex.push_back(nullptr);
+    Vertex.push_back(nullptr);

    if (!this->IsPostDominators) {
      // Initialize root
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@ -143,10 +143,9 @@ public:

  void writeNodes() {
    // Loop over the graph, printing it out...
-    for (node_iterator I = GTraits::nodes_begin(G), E = GTraits::nodes_end(G);
-         I != E; ++I)
-      if (!isNodeHidden(*I))
-        writeNode(*I);
+    for (const auto Node : nodes<GraphType>(G))
+      if (!isNodeHidden(Node))
+        writeNode(Node);
  }

  bool isNodeHidden(NodeRef Node) {
--- a/include/llvm/Support/LowLevelTypeImpl.h
+++ b/include/llvm/Support/LowLevelTypeImpl.h
@ -39,100 +39,123 @@ class raw_ostream;

 class LLT {
 public:
-  enum TypeKind : uint16_t {
-    Invalid,
-    Scalar,
-    Pointer,
-    Vector,
-  };
-
  /// Get a low-level scalar or aggregate "bag of bits".
  static LLT scalar(unsigned SizeInBits) {
    assert(SizeInBits > 0 && "invalid scalar size");
-    return LLT{Scalar, 1, SizeInBits};
+    return LLT{/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
+               SizeInBits, /*AddressSpace=*/0};
  }

  /// Get a low-level pointer in the given address space (defaulting to 0).
  static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits) {
-    return LLT{Pointer, AddressSpace, SizeInBits};
+    assert(SizeInBits > 0 && "invalid pointer size");
+    return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0,
+               SizeInBits, AddressSpace};
  }

  /// Get a low-level vector of some number of elements and element width.
  /// \p NumElements must be at least 2.
  static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits) {
    assert(NumElements > 1 && "invalid number of vector elements");
-    return LLT{Vector, NumElements, ScalarSizeInBits};
+    assert(ScalarSizeInBits > 0 && "invalid vector element size");
+    return LLT{/*isPointer=*/false, /*isVector=*/true, NumElements,
+               ScalarSizeInBits, /*AddressSpace=*/0};
  }

  /// Get a low-level vector of some number of elements and element type.
  static LLT vector(uint16_t NumElements, LLT ScalarTy) {
    assert(NumElements > 1 && "invalid number of vector elements");
-    assert(ScalarTy.isScalar() && "invalid vector element type");
-    return LLT{Vector, NumElements, ScalarTy.getSizeInBits()};
+    assert(!ScalarTy.isVector() && "invalid vector element type");
+    return LLT{ScalarTy.isPointer(), /*isVector=*/true, NumElements,
+               ScalarTy.getSizeInBits(),
+               ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
  }

-  explicit LLT(TypeKind Kind, uint16_t NumElements, unsigned SizeInBits)
-    : SizeInBits(SizeInBits), ElementsOrAddrSpace(NumElements), Kind(Kind) {
-    assert((Kind != Vector || ElementsOrAddrSpace > 1) &&
-           "invalid number of vector elements");
+  explicit LLT(bool isPointer, bool isVector, uint16_t NumElements,
+               unsigned SizeInBits, unsigned AddressSpace) {
+    init(isPointer, isVector, NumElements, SizeInBits, AddressSpace);
  }
-
-  explicit LLT() : SizeInBits(0), ElementsOrAddrSpace(0), Kind(Invalid) {}
+  explicit LLT() : IsPointer(false), IsVector(false), RawData(0) {}

  explicit LLT(MVT VT);

-  bool isValid() const { return Kind != Invalid; }
+  bool isValid() const { return RawData != 0; }

-  bool isScalar() const { return Kind == Scalar; }
+  bool isScalar() const { return isValid() && !IsPointer && !IsVector; }

-  bool isPointer() const { return Kind == Pointer; }
+  bool isPointer() const { return isValid() && IsPointer && !IsVector; }

-  bool isVector() const { return Kind == Vector; }
+  bool isVector() const { return isValid() && IsVector; }

  /// Returns the number of elements in a vector LLT. Must only be called on
  /// vector types.
  uint16_t getNumElements() const {
-    assert(isVector() && "cannot get number of elements on scalar/aggregate");
-    return ElementsOrAddrSpace;
+    assert(IsVector && "cannot get number of elements on scalar/aggregate");
+    if (!IsPointer)
+      return getFieldValue(VectorElementsFieldInfo);
+    else
+      return getFieldValue(PointerVectorElementsFieldInfo);
  }

  /// Returns the total size of the type. Must only be called on sized types.
  unsigned getSizeInBits() const {
    if (isPointer() || isScalar())
-      return SizeInBits;
-    return SizeInBits * ElementsOrAddrSpace;
+      return getScalarSizeInBits();
+    return getScalarSizeInBits() * getNumElements();
  }

  unsigned getScalarSizeInBits() const {
-    return SizeInBits;
+    assert(RawData != 0 && "Invalid Type");
+    if (!IsVector) {
+      if (!IsPointer)
+        return getFieldValue(ScalarSizeFieldInfo);
+      else
+        return getFieldValue(PointerSizeFieldInfo);
+    } else {
+      if (!IsPointer)
+        return getFieldValue(VectorSizeFieldInfo);
+      else
+        return getFieldValue(PointerVectorSizeFieldInfo);
+    }
  }

  unsigned getAddressSpace() const {
-    assert(isPointer() && "cannot get address space of non-pointer type");
-    return ElementsOrAddrSpace;
+    assert(RawData != 0 && "Invalid Type");
+    assert(IsPointer && "cannot get address space of non-pointer type");
+    if (!IsVector)
+      return getFieldValue(PointerAddressSpaceFieldInfo);
+    else
+      return getFieldValue(PointerVectorAddressSpaceFieldInfo);
  }

  /// Returns the vector's element type. Only valid for vector types.
  LLT getElementType() const {
    assert(isVector() && "cannot get element type of scalar/aggregate");
-    return scalar(SizeInBits);
+    if (IsPointer)
+      return pointer(getAddressSpace(), getScalarSizeInBits());
+    else
+      return scalar(getScalarSizeInBits());
  }

  /// Get a low-level type with half the size of the original, by halving the
  /// size of the scalar type involved. For example `s32` will become `s16`,
  /// `<2 x s32>` will become `<2 x s16>`.
  LLT halfScalarSize() const {
-    assert(!isPointer() && getScalarSizeInBits() > 1 &&
+    assert(!IsPointer && getScalarSizeInBits() > 1 &&
           getScalarSizeInBits() % 2 == 0 && "cannot half size of this type");
-    return LLT{Kind, ElementsOrAddrSpace, SizeInBits / 2};
+    return LLT{/*isPointer=*/false, IsVector ? true : false,
+               IsVector ? getNumElements() : (uint16_t)0,
+               getScalarSizeInBits() / 2, /*AddressSpace=*/0};
  }

  /// Get a low-level type with twice the size of the original, by doubling the
  /// size of the scalar type involved. For example `s32` will become `s64`,
  /// `<2 x s32>` will become `<2 x s64>`.
  LLT doubleScalarSize() const {
-    assert(!isPointer() && "cannot change size of this type");
-    return LLT{Kind, ElementsOrAddrSpace, SizeInBits * 2};
+    assert(!IsPointer && "cannot change size of this type");
+    return LLT{/*isPointer=*/false, IsVector ? true : false,
+               IsVector ? getNumElements() : (uint16_t)0,
+               getScalarSizeInBits() * 2, /*AddressSpace=*/0};
  }

  /// Get a low-level type with half the size of the original, by halving the
@ -140,13 +163,13 @@ public:
  /// a vector type with an even number of elements. For example `<4 x s32>`
  /// will become `<2 x s32>`, `<2 x s32>` will become `s32`.
  LLT halfElements() const {
-    assert(isVector() && ElementsOrAddrSpace % 2 == 0 &&
-           "cannot half odd vector");
-    if (ElementsOrAddrSpace == 2)
-      return scalar(SizeInBits);
+    assert(isVector() && getNumElements() % 2 == 0 && "cannot half odd vector");
+    if (getNumElements() == 2)
+      return scalar(getScalarSizeInBits());

-    return LLT{Vector, static_cast<uint16_t>(ElementsOrAddrSpace / 2),
-               SizeInBits};
+    return LLT{/*isPointer=*/false, /*isVector=*/true,
+               (uint16_t)(getNumElements() / 2), getScalarSizeInBits(),
+               /*AddressSpace=*/0};
  }

  /// Get a low-level type with twice the size of the original, by doubling the
@ -154,25 +177,105 @@ public:
  /// a vector type. For example `<2 x s32>` will become `<4 x s32>`. Doubling
  /// the number of elements in sN produces <2 x sN>.
  LLT doubleElements() const {
-    assert(!isPointer() && "cannot double elements in pointer");
-    return LLT{Vector, static_cast<uint16_t>(ElementsOrAddrSpace * 2),
-               SizeInBits};
+    return LLT{IsPointer ? true : false, /*isVector=*/true,
+               (uint16_t)(getNumElements() * 2), getScalarSizeInBits(),
+               IsPointer ? getAddressSpace() : 0};
  }

  void print(raw_ostream &OS) const;

  bool operator==(const LLT &RHS) const {
-    return Kind == RHS.Kind && SizeInBits == RHS.SizeInBits &&
-           ElementsOrAddrSpace == RHS.ElementsOrAddrSpace;
+    return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector &&
+           RHS.RawData == RawData;
  }

  bool operator!=(const LLT &RHS) const { return !(*this == RHS); }

  friend struct DenseMapInfo<LLT>;
+
 private:
-  unsigned SizeInBits;
-  uint16_t ElementsOrAddrSpace;
-  TypeKind Kind;
+  /// LLT is packed into 64 bits as follows:
+  /// isPointer : 1
+  /// isVector  : 1
+  /// with 62 bits remaining for Kind-specific data, packed in bitfields
+  /// as described below. As there isn't a simple portable way to pack bits
+  /// into bitfields, here the different fields in the packed structure is
+  /// described in static const *Field variables. Each of these variables
+  /// is a 2-element array, with the first element describing the bitfield size
+  /// and the second element describing the bitfield offset.
+  typedef int BitFieldInfo[2];
+  ///
+  /// This is how the bitfields are packed per Kind:
+  /// * Invalid:
+  ///   gets encoded as RawData == 0, as that is an invalid encoding, since for
+  ///   valid encodings, SizeInBits/SizeOfElement must be larger than 0.
+  /// * Non-pointer scalar (isPointer == 0 && isVector == 0):
+  ///   SizeInBits: 32;
+  static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
+  /// * Pointer (isPointer == 1 && isVector == 0):
+  ///   SizeInBits: 16;
+  ///   AddressSpace: 23;
+  static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
+  static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
+      23, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
+  /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
+  ///   NumElements: 16;
+  ///   SizeOfElement: 32;
+  static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0};
+  static const constexpr BitFieldInfo VectorSizeFieldInfo{
+      32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]};
+  /// * Vector-of-pointer (isPointer == 1 && isVector == 1):
+  ///   NumElements: 16;
+  ///   SizeOfElement: 16;
+  ///   AddressSpace: 23;
+  static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
+  static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
+      16,
+      PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
+  static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
+      23, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
+
+  uint64_t IsPointer : 1;
+  uint64_t IsVector : 1;
+  uint64_t RawData : 62;
+
+  static uint64_t getMask(const BitFieldInfo FieldInfo) {
+    const int FieldSizeInBits = FieldInfo[0];
+    return (((uint64_t)1) << FieldSizeInBits) - 1;
+  }
+  static uint64_t maskAndShift(uint64_t Val, uint64_t Mask, uint8_t Shift) {
+    assert(Val <= Mask && "Value too large for field");
+    return (Val & Mask) << Shift;
+  }
+  static uint64_t maskAndShift(uint64_t Val, const BitFieldInfo FieldInfo) {
+    return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]);
+  }
+  uint64_t getFieldValue(const BitFieldInfo FieldInfo) const {
+    return getMask(FieldInfo) & (RawData >> FieldInfo[1]);
+  }
+
+  void init(bool IsPointer, bool IsVector, uint16_t NumElements,
+            unsigned SizeInBits, unsigned AddressSpace) {
+    this->IsPointer = IsPointer;
+    this->IsVector = IsVector;
+    if (!IsVector) {
+      if (!IsPointer)
+        RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo);
+      else
+        RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) |
+                  maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo);
+    } else {
+      assert(NumElements > 1 && "invalid number of vector elements");
+      if (!IsPointer)
+        RawData = maskAndShift(NumElements, VectorElementsFieldInfo) |
+                  maskAndShift(SizeInBits, VectorSizeFieldInfo);
+      else
+        RawData =
+            maskAndShift(NumElements, PointerVectorElementsFieldInfo) |
+            maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) |
+            maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo);
+    }
+  }
 };

 inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
@ -182,14 +285,18 @@ inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {

 template<> struct DenseMapInfo<LLT> {
  static inline LLT getEmptyKey() {
-    return LLT{LLT::Invalid, 0, -1u};
+    LLT Invalid;
+    Invalid.IsPointer = true;
+    return Invalid;
  }
  static inline LLT getTombstoneKey() {
-    return LLT{LLT::Invalid, 0, -2u};
+    LLT Invalid;
+    Invalid.IsVector = true;
+    return Invalid;
  }
  static inline unsigned getHashValue(const LLT &Ty) {
-    uint64_t Val = ((uint64_t)Ty.SizeInBits << 32) |
-                   ((uint64_t)Ty.ElementsOrAddrSpace << 16) | (uint64_t)Ty.Kind;
+    uint64_t Val = ((uint64_t)Ty.RawData) << 2 | ((uint64_t)Ty.IsPointer) << 1 |
+                   ((uint64_t)Ty.IsVector);
    return DenseMapInfo<uint64_t>::getHashValue(Val);
  }
  static bool isEqual(const LLT &LHS, const LLT &RHS) {
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@ -18,6 +18,7 @@
 #include "llvm/Support/SwapByteOrder.h"
 #include <algorithm>
 #include <cassert>
+#include <climits>
 #include <cstring>
 #include <type_traits>
 #include <limits>
@ -198,6 +199,21 @@ template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
  return countTrailingZeros(Val, ZB_Undefined);
 }

+/// \brief Create a bitmask with the N right-most bits set to 1, and all other
+/// bits set to 0.  Only unsigned types are allowed.
+template <typename T> T maskTrailingOnes(unsigned N) {
+  static_assert(std::is_unsigned<T>::value, "Invalid type!");
+  const unsigned Bits = CHAR_BIT * sizeof(T);
+  assert(N <= Bits && "Invalid bit index");
+  return N == 0 ? 0 : (T(-1) >> (Bits - N));
+}
+
+/// \brief Create a bitmask with the N left-most bits set to 1, and all other
+/// bits set to 0.  Only unsigned types are allowed.
+template <typename T> T maskLeadingOnes(unsigned N) {
+  return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
+}
+
 /// \brief Get the index of the last set bit starting from the least
 ///   significant bit.
 ///
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@ -42,13 +42,16 @@ class Recycler {

  FreeNode *pop_val() {
    auto *Val = FreeList;
+    __asan_unpoison_memory_region(Val, Size);
    FreeList = FreeList->Next;
+    __msan_allocated_memory(Val, Size);
    return Val;
  }

  void push(FreeNode *N) {
    N->Next = FreeList;
    FreeList = N;
+    __asan_poison_memory_region(N, Size);
  }

 public:
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@ -57,7 +57,7 @@ namespace llvm {

    /// isValid - returns the error encountered during regex compilation, or
    /// matching, if any.
-    bool isValid(std::string &Error);
+    bool isValid(std::string &Error) const;

    /// getNumMatches - In a valid regex, return the number of parenthesized
    /// matches it contains.  The number filled in by match will include this
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@ -75,7 +75,7 @@ enum ArchExtKind : unsigned {
  AEK_CRC = 0x2,
  AEK_CRYPTO = 0x4,
  AEK_FP = 0x8,
-  AEK_HWDIV = 0x10,
+  AEK_HWDIVTHUMB = 0x10,
  AEK_HWDIVARM = 0x20,
  AEK_MP = 0x40,
  AEK_SIMD = 0x80,
--- a/include/llvm/TableGen/StringToOffsetTable.h
+++ b/include/llvm/TableGen/StringToOffsetTable.h
@ -27,6 +27,8 @@ class StringToOffsetTable {
  std::string AggregateString;

 public:
+  bool Empty() const { return StringOffset.empty(); }
+
  unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
    auto IterBool =
        StringOffset.insert(std::make_pair(Str, AggregateString.size()));
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -230,6 +230,12 @@ public:
    return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
  }

+  /// Return the type for frame index, which is determined by
+  /// the alloca address space specified through the data layout.
+  MVT getFrameIndexTy(const DataLayout &DL) const {
+    return getPointerTy(DL, DL.getAllocaAddrSpace());
+  }
+
  /// EVT is not used in-tree, but is used by out-of-tree target.
  /// A documentation for this function would be nice...
  virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
@ -2807,7 +2813,7 @@ public:
  /// Return true if the target may be able emit the call instruction as a tail
  /// call. This is used by optimization passes to determine if it's profitable
  /// to duplicate return instructions to enable tailcall optimization.
-  virtual bool mayBeEmittedAsTailCall(CallInst *) const {
+  virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
    return false;
  }

--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@ -65,14 +65,6 @@ template <typename T> class ArrayRef;
    /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid.
    static bool isBlockValidForExtraction(const BasicBlock &BB);

-    /// \brief Create a code extractor for a single basic block.
-    ///
-    /// In this formation, we don't require a dominator tree. The given basic
-    /// block is set up for extraction.
-    CodeExtractor(BasicBlock *BB, bool AggregateArgs = false,
-                  BlockFrequencyInfo *BFI = nullptr,
-                  BranchProbabilityInfo *BPI = nullptr);
-
    /// \brief Create a code extractor for a sequence of blocks.
    ///
    /// Given a sequence of basic blocks where the first block in the sequence
@ -91,14 +83,6 @@ template <typename T> class ArrayRef;
                  BlockFrequencyInfo *BFI = nullptr,
                  BranchProbabilityInfo *BPI = nullptr);

-    /// \brief Create a code extractor for a region node.
-    ///
-    /// Behaves just like the generic code sequence constructor, but uses the
-    /// block sequence of the region node passed in.
-    CodeExtractor(DominatorTree &DT, const RegionNode &RN,
-                  bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
-                  BranchProbabilityInfo *BPI = nullptr);
-
    /// \brief Perform the extraction, returning the new function.
    ///
    /// Returns zero when called on a CodeExtractor instance where isEligible
--- a/include/llvm/XRay/InstrumentationMap.h
+++ b/include/llvm/XRay/InstrumentationMap.h
@ -59,6 +59,7 @@ struct YAMLXRaySledEntry {
  yaml::Hex64 Function;
  SledEntry::FunctionKinds Kind;
  bool AlwaysInstrument;
+  std::string FunctionName;
 };

 /// The InstrumentationMap represents the computed function id's and indicated
@ -115,6 +116,7 @@ template <> struct MappingTraits<xray::YAMLXRaySledEntry> {
    IO.mapRequired("function", Entry.Function);
    IO.mapRequired("kind", Entry.Kind);
    IO.mapRequired("always-instrument", Entry.AlwaysInstrument);
+    IO.mapOptional("function-name", Entry.FunctionName);
  }

  static constexpr bool flow = true;
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@ -808,7 +808,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
  // well.  Or alternatively, replace all of this with inaccessiblememonly once
  // that's implemented fully. 
  auto *Inst = CS.getInstruction();
-  if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) {
+  if (isMallocOrCallocLikeFn(Inst, &TLI)) {
    // Be conservative if the accessed pointer may alias the allocation -
    // fallback to the generic handling below.
    if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
@ -925,9 +925,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
                                            const DataLayout &DL) {

  assert(GEP1->getPointerOperand()->stripPointerCasts() ==
-         GEP2->getPointerOperand()->stripPointerCasts() &&
-         GEP1->getPointerOperand()->getType() ==
-         GEP2->getPointerOperand()->getType() &&
+             GEP2->getPointerOperand()->stripPointerCasts() &&
+         GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
         "Expected GEPs with the same pointer operand");

  // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
@ -1186,9 +1185,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
    // just the same underlying object), see if that tells us anything about
    // the resulting pointers.
    if (GEP1->getPointerOperand()->stripPointerCasts() ==
-        GEP2->getPointerOperand()->stripPointerCasts() &&
-        GEP1->getPointerOperand()->getType() ==
-        GEP2->getPointerOperand()->getType()) {
+            GEP2->getPointerOperand()->stripPointerCasts() &&
+        GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
      // If we couldn't find anything interesting, don't abandon just yet.
      if (R != MayAlias)
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@ -72,6 +72,32 @@ static const uint32_t UR_TAKEN_WEIGHT = 1;
 /// easily subsume it.
 static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;

+/// \brief Returns the branch probability for unreachable edge according to
+/// heuristic.
+///
+/// This is the branch probability being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static BranchProbability getUnreachableProbability(uint64_t UnreachableCount) {
+  assert(UnreachableCount > 0 && "UnreachableCount must be > 0");
+  return BranchProbability::getBranchProbability(
+      UR_TAKEN_WEIGHT,
+      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * UnreachableCount);
+}
+
+/// \brief Returns the branch probability for reachable edge according to
+/// heuristic.
+///
+/// This is the branch probability not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static BranchProbability getReachableProbability(uint64_t ReachableCount) {
+  assert(ReachableCount > 0 && "ReachableCount must be > 0");
+  return BranchProbability::getBranchProbability(
+      UR_NONTAKEN_WEIGHT,
+      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * ReachableCount);
+}
+
 /// \brief Weight for a branch taken going into a cold block.
 ///
 /// This is the weight for a branch taken toward a block marked
@ -179,7 +205,11 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
 /// unreachable-terminated block as extremely unlikely.
 bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
  const TerminatorInst *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0)
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+  // Return false here so that edge weights for InvokeInst could be decided
+  // in calcInvokeHeuristics().
+  if (isa<InvokeInst>(TI))
    return false;

  SmallVector<unsigned, 4> UnreachableEdges;
@ -191,14 +221,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
    else
      ReachableEdges.push_back(I.getSuccessorIndex());

-  // Skip probabilities if this block has a single successor or if all were
-  // reachable.
-  if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
-    return false;
-
-  // Return false here so that edge weights for InvokeInst could be decided
-  // in calcInvokeHeuristics().
-  if (isa<InvokeInst>(TI))
+  // Skip probabilities if all were reachable.
+  if (UnreachableEdges.empty())
    return false;

  if (ReachableEdges.empty()) {
@ -208,12 +232,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
    return true;
  }

-  auto UnreachableProb = BranchProbability::getBranchProbability(
-      UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                           uint64_t(UnreachableEdges.size()));
-  auto ReachableProb = BranchProbability::getBranchProbability(
-      UR_NONTAKEN_WEIGHT,
-      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
+  auto UnreachableProb = getUnreachableProbability(UnreachableEdges.size());
+  auto ReachableProb = getReachableProbability(ReachableEdges.size());

  for (unsigned SuccIdx : UnreachableEdges)
    setEdgeProbability(BB, SuccIdx, UnreachableProb);
@ -224,11 +244,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
 }

 // Propagate existing explicit probabilities from either profile data or
-// 'expect' intrinsic processing.
+// 'expect' intrinsic processing. Examine metadata against unreachable
+// heuristic. The probability of the edge coming to unreachable block is
+// set to min of metadata and unreachable heuristic.
 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
  const TerminatorInst *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 1)
-    return false;
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
    return false;

@ -249,6 +270,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
  // be scaled to fit in 32 bits.
  uint64_t WeightSum = 0;
  SmallVector<uint32_t, 2> Weights;
+  SmallVector<unsigned, 2> UnreachableIdxs;
+  SmallVector<unsigned, 2> ReachableIdxs;
  Weights.reserve(TI->getNumSuccessors());
  for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
    ConstantInt *Weight =
@ -259,6 +282,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
           "Too many bits for uint32_t");
    Weights.push_back(Weight->getZExtValue());
    WeightSum += Weights.back();
+    if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1)))
+      UnreachableIdxs.push_back(i - 1);
+    else
+      ReachableIdxs.push_back(i - 1);
  }
  assert(Weights.size() == TI->getNumSuccessors() && "Checked above");

@ -267,20 +294,52 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
  uint64_t ScalingFactor =
      (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;

-  WeightSum = 0;
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    Weights[i] /= ScalingFactor;
-    WeightSum += Weights[i];
+  if (ScalingFactor > 1) {
+    WeightSum = 0;
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+      Weights[i] /= ScalingFactor;
+      WeightSum += Weights[i];
+    }
  }

-  if (WeightSum == 0) {
+  if (WeightSum == 0 || ReachableIdxs.size() == 0) {
    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      setEdgeProbability(BB, i, {1, e});
-  } else {
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+      Weights[i] = 1;
+    WeightSum = TI->getNumSuccessors();
  }

+  // Set the probability.
+  SmallVector<BranchProbability, 2> BP;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    BP.push_back({ Weights[i], static_cast<uint32_t>(WeightSum) });
+
+  // Examine the metadata against unreachable heuristic.
+  // If the unreachable heuristic is more strong then we use it for this edge.
+  if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) {
+    auto ToDistribute = BranchProbability::getZero();
+    auto UnreachableProb = getUnreachableProbability(UnreachableIdxs.size());
+    for (auto i : UnreachableIdxs)
+      if (UnreachableProb < BP[i]) {
+        ToDistribute += BP[i] - UnreachableProb;
+        BP[i] = UnreachableProb;
+      }
+
+    // If we modified the probability of some edges then we must distribute
+    // the difference between reachable blocks.
+    if (ToDistribute > BranchProbability::getZero()) {
+      BranchProbability PerEdge = ToDistribute / ReachableIdxs.size();
+      for (auto i : ReachableIdxs) {
+        BP[i] += PerEdge;
+        ToDistribute -= PerEdge;
+      }
+      // Tail goes to the first reachable edge.
+      BP[ReachableIdxs[0]] += ToDistribute;
+    }
+  }
+
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    setEdgeProbability(BB, i, BP[i]);
+
  assert(WeightSum <= UINT32_MAX &&
         "Expected weights to scale down to 32 bits");

@ -297,7 +356,11 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
 /// Return false, otherwise.
 bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
  const TerminatorInst *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0)
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+  // Return false here so that edge weights for InvokeInst could be decided
+  // in calcInvokeHeuristics().
+  if (isa<InvokeInst>(TI))
    return false;

  // Determine which successors are post-dominated by a cold block.
@ -309,13 +372,8 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
    else
      NormalEdges.push_back(I.getSuccessorIndex());

-  // Return false here so that edge weights for InvokeInst could be decided
-  // in calcInvokeHeuristics().
-  if (isa<InvokeInst>(TI))
-    return false;
-
-  // Skip probabilities if this block has a single successor.
-  if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
+  // Skip probabilities if no cold edges.
+  if (ColdEdges.empty())
    return false;

  if (NormalEdges.empty()) {
@ -698,10 +756,13 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
    updatePostDominatedByUnreachable(BB);
    updatePostDominatedByColdCall(BB);
-    if (calcUnreachableHeuristics(BB))
+    // If there is no at least two successors, no sense to set probability.
+    if (BB->getTerminator()->getNumSuccessors() < 2)
      continue;
    if (calcMetadataWeights(BB))
      continue;
+    if (calcUnreachableHeuristics(BB))
+      continue;
    if (calcColdCallHeuristics(BB))
      continue;
    if (calcLoopBranchHeuristics(BB, LI))
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@ -400,8 +400,7 @@ template <typename CFLAA> class CFLGraphBuilder {
      // TODO: address other common library functions such as realloc(),
      // strdup(),
      // etc.
-      if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
-          isFreeCall(Inst, &TLI))
+      if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
        return;

      // TODO: Add support for noalias args/all the other fun function
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@ -75,20 +75,16 @@ static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyCastInst(unsigned, Value *, Type *,
                               const Query &, unsigned);

-/// For a boolean type, or a vector of boolean type, return false, or
-/// a vector with every element false, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return false or a vector
+/// with every element false.
 static Constant *getFalse(Type *Ty) {
-  assert(Ty->getScalarType()->isIntegerTy(1) &&
-         "Expected i1 type or a vector of i1!");
-  return Constant::getNullValue(Ty);
+  return ConstantInt::getFalse(Ty);
 }

-/// For a boolean type, or a vector of boolean type, return true, or
-/// a vector with every element true, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return true or a vector
+/// with every element true.
 static Constant *getTrue(Type *Ty) {
-  assert(Ty->getScalarType()->isIntegerTy(1) &&
-         "Expected i1 type or a vector of i1!");
-  return Constant::getAllOnesValue(Ty);
+  return ConstantInt::getTrue(Ty);
 }

 /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
@ -572,11 +568,11 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
      match(Op1, m_Not(m_Specific(Op0))))
    return Constant::getAllOnesValue(Ty);

-  // add nsw/nuw (xor Y, signbit), signbit --> Y
+  // add nsw/nuw (xor Y, signmask), signmask --> Y
  // The no-wrapping add guarantees that the top bit will be set by the add.
  // Therefore, the xor must be clearing the already set sign bit of Y.
-  if ((isNSW || isNUW) && match(Op1, m_SignBit()) &&
-      match(Op0, m_Xor(m_Value(Y), m_SignBit())))
+  if ((isNSW || isNUW) && match(Op1, m_SignMask()) &&
+      match(Op0, m_Xor(m_Value(Y), m_SignMask())))
    return Y;

  /// i1 add -> xor.
@ -1085,7 +1081,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
  if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
      match(Op1, m_ConstantInt(C2))) {
    bool Overflow;
-    C1->getValue().umul_ov(C2->getValue(), Overflow);
+    (void)C1->getValue().umul_ov(C2->getValue(), Overflow);
    if (Overflow)
      return Constant::getNullValue(Op0->getType());
  }
@ -2823,7 +2819,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
            return ConstantInt::getTrue(RHS->getContext());
        }
      }
-      if (CIVal->isSignBit() && *CI2Val == 1) {
+      if (CIVal->isSignMask() && *CI2Val == 1) {
        if (Pred == ICmpInst::ICMP_UGT)
          return ConstantInt::getFalse(RHS->getContext());
        if (Pred == ICmpInst::ICMP_ULE)
@ -3800,6 +3796,8 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
  Type *GEPTy = PointerType::get(LastType, AS);
  if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+  else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
+    GEPTy = VectorType::get(GEPTy, VT->getNumElements());

  if (isa<UndefValue>(Ops[0]))
    return UndefValue::get(GEPTy);
@ -4082,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
                            RecursionLimit);
 }

+/// For the given destination element of a shuffle, peek through shuffles to
+/// match a root vector source operand that contains that element in the same
+/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
+static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
+                                   Constant *Mask, Value *RootVec, int RootElt,
+                                   unsigned MaxRecurse) {
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Bail out if any mask value is undefined. That kind of shuffle may be
+  // simplified further based on demanded bits or other folds.
+  int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
+  if (MaskVal == -1)
+    return nullptr;
+
+  // The mask value chooses which source operand we need to look at next.
+  Value *SourceOp;
+  int InVecNumElts = Op0->getType()->getVectorNumElements();
+  if (MaskVal < InVecNumElts) {
+    RootElt = MaskVal;
+    SourceOp = Op0;
+  } else {
+    RootElt = MaskVal - InVecNumElts;
+    SourceOp = Op1;
+  }
+
+  // If the source operand is a shuffle itself, look through it to find the
+  // matching root vector.
+  if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+    return foldIdentityShuffles(
+        DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
+        SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
+  }
+
+  // TODO: Look through bitcasts? What if the bitcast changes the vector element
+  // size?
+
+  // The source operand is not a shuffle. Initialize the root vector value for
+  // this shuffle if that has not been done yet.
+  if (!RootVec)
+    RootVec = SourceOp;
+
+  // Give up as soon as a source operand does not match the existing root value.
+  if (RootVec != SourceOp)
+    return nullptr;
+
+  // The element must be coming from the same lane in the source vector
+  // (although it may have crossed lanes in intermediate shuffles).
+  if (RootElt != DestElt)
+    return nullptr;
+
+  return RootVec;
+}
+
 static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
                                        Type *RetTy, const Query &Q,
                                        unsigned MaxRecurse) {
@ -4126,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
        OpShuf->getMask()->getSplatValue())
      return Op1;

-  return nullptr;
+  // Don't fold a shuffle with undef mask elements. This may get folded in a
+  // better way using demanded bits or other analysis.
+  // TODO: Should we allow this?
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
+      return nullptr;
+
+  // Check if every element of this shuffle can be mapped back to the
+  // corresponding element of a single root vector. If so, we don't need this
+  // shuffle. This handles simple identity shuffles as well as chains of
+  // shuffles that may widen/narrow and/or move elements across lanes and back.
+  Value *RootVec = nullptr;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    // Note that recursion is limited for each vector element, so if any element
+    // exceeds the limit, this will fail to simplify.
+    RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
+
+    // We can't replace a widening/narrowing shuffle with one of its operands.
+    if (!RootVec || RootVec->getType() != RetTy)
+      return nullptr;
+  }
+  return RootVec;
 }

 /// Given operands for a ShuffleVectorInst, fold the result or return null.
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@ -37,6 +37,7 @@ enum AllocType : uint8_t {
  CallocLike         = 1<<2, // allocates + bzero
  ReallocLike        = 1<<3, // reallocates
  StrDupLike         = 1<<4,
+  MallocOrCallocLike = MallocLike | CallocLike,
  AllocLike          = MallocLike | CallocLike | StrDupLike,
  AnyAlloc           = AllocLike | ReallocLike
 };
@ -77,8 +78,8 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
  // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
 };

-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
-                                   bool &IsNoBuiltin) {
+static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+                                         bool &IsNoBuiltin) {
  // Don't care about intrinsics in this case.
  if (isa<IntrinsicInst>(V))
    return nullptr;
@ -86,13 +87,13 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
  if (LookThroughBitCast)
    V = V->stripPointerCasts();

-  CallSite CS(const_cast<Value*>(V));
+  ImmutableCallSite CS(V);
  if (!CS.getInstruction())
    return nullptr;

  IsNoBuiltin = CS.isNoBuiltin();

-  Function *Callee = CS.getCalledFunction();
+  const Function *Callee = CS.getCalledFunction();
  if (!Callee || !Callee->isDeclaration())
    return nullptr;
  return Callee;
@ -219,6 +220,14 @@ bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
  return getAllocationData(V, CallocLike, TLI, LookThroughBitCast).hasValue();
 }

+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory similiar to malloc or calloc.
+bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                                  bool LookThroughBitCast) {
+  return getAllocationData(V, MallocOrCallocLike, TLI,
+                           LookThroughBitCast).hasValue();
+}
+
 /// \brief Tests if a value is a call or invoke to a library function that
 /// allocates memory (either malloc, calloc, or strdup like).
 bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@ -1291,7 +1291,6 @@ void MemorySSA::buildMemorySSA() {
  // could just look up the memory access for every possible instruction in the
  // stream.
  SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
-  SmallPtrSet<BasicBlock *, 32> DefUseBlocks;
  // Go through each block, figure out where defs occur, and chain together all
  // the accesses.
  for (BasicBlock &B : F) {
@ -1316,8 +1315,6 @@ void MemorySSA::buildMemorySSA() {
    }
    if (InsertIntoDef)
      DefiningBlocks.insert(&B);
-    if (Accesses)
-      DefUseBlocks.insert(&B);
  }
  placePHINodes(DefiningBlocks, BBNumbers);

--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@ -1093,7 +1093,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
    APInt Mult(W, i);
    unsigned TwoFactors = Mult.countTrailingZeros();
    T += TwoFactors;
-    Mult = Mult.lshr(TwoFactors);
+    Mult.lshrInPlace(TwoFactors);
    OddFactorial *= Mult;
  }

@ -1276,7 +1276,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
 namespace {

 struct ExtendOpTraitsBase {
-  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(
+      const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache);
 };

 // Used to make code generic over signed and unsigned overflow.
@ -1305,8 +1306,9 @@ struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
  }
 };

-const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
-    SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+const ExtendOpTraitsBase::GetExtendExprTy
+    ExtendOpTraits<SCEVSignExtendExpr>::GetExtendExpr =
+        &ScalarEvolution::getSignExtendExprCached;

 template <>
 struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
@ -1321,8 +1323,9 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
  }
 };

-const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
-    SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+const ExtendOpTraitsBase::GetExtendExprTy
+    ExtendOpTraits<SCEVZeroExtendExpr>::GetExtendExpr =
+        &ScalarEvolution::getZeroExtendExprCached;
 }

 // The recurrence AR has been shown to have no signed/unsigned wrap or something
@ -1334,7 +1337,8 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
 // "sext/zext(PostIncAR)"
 template <typename ExtendOpTy>
 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
-                                        ScalarEvolution *SE) {
+                                        ScalarEvolution *SE,
+                                        ScalarEvolution::ExtendCacheTy &Cache) {
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;

@ -1381,9 +1385,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
  const SCEV *OperandExtendedStart =
-      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
-                     (SE->*GetExtendExpr)(Step, WideTy));
-  if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache),
+                     (SE->*GetExtendExpr)(Step, WideTy, Cache));
+  if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) {
    if (PreAR && AR->getNoWrapFlags(WrapType)) {
      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
@ -1408,15 +1412,17 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
 // Get the normalized zero or sign extended expression for this AddRec's Start.
 template <typename ExtendOpTy>
 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
-                                        ScalarEvolution *SE) {
+                                        ScalarEvolution *SE,
+                                        ScalarEvolution::ExtendCacheTy &Cache) {
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;

-  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Cache);
  if (!PreStart)
-    return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+    return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache);

-  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
-                        (SE->*GetExtendExpr)(PreStart, Ty));
+  return SE->getAddExpr(
+      (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache),
+      (SE->*GetExtendExpr)(PreStart, Ty, Cache));
 }

 // Try to prove away overflow by looking at "nearby" add recurrences.  A
@ -1496,8 +1502,31 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
  return false;
 }

-const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
-                                               Type *Ty) {
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) {
+  // Use the local cache to prevent exponential behavior of
+  // getZeroExtendExprImpl.
+  ExtendCacheTy Cache;
+  return getZeroExtendExprCached(Op, Ty, Cache);
+}
+
+/// Query \p Cache before calling getZeroExtendExprImpl. If there is no
+/// related entry in the \p Cache, call getZeroExtendExprImpl and save
+/// the result in the \p Cache.
+const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty,
+                                                     ExtendCacheTy &Cache) {
+  auto It = Cache.find({Op, Ty});
+  if (It != Cache.end())
+    return It->second;
+  const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache);
+  auto InsertResult = Cache.insert({{Op, Ty}, ZExt});
+  assert(InsertResult.second && "Expect the key was not in the cache");
+  (void)InsertResult;
+  return ZExt;
+}
+
+/// The real implementation of getZeroExtendExpr.
+const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
+                                                   ExtendCacheTy &Cache) {
  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
         "This is not an extending conversion!");
  assert(isSCEVable(Ty) &&
@ -1507,11 +1536,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
  // Fold if the operand is constant.
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    return getConstant(
-      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+        cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));

  // zext(zext(x)) --> zext(x)
  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
-    return getZeroExtendExpr(SZ->getOperand(), Ty);
+    return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache);

  // Before doing any expensive analysis, check to see if we've already
  // computed a SCEV for this Op and Ty.
@ -1555,8 +1584,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
      // we don't need to do any further analysis.
      if (AR->hasNoUnsignedWrap())
        return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+            getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());

      // Check whether the backedge-taken count is SCEVCouldNotCompute.
      // Note that this serves two purposes: It filters out loops that are
@ -1581,21 +1610,22 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
          // Check whether Start+Step*MaxBECount has no unsigned overflow.
          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
-          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+          const SCEV *ZAdd =
+              getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache);
+          const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache);
          const SCEV *WideMaxBECount =
-            getZeroExtendExpr(CastedMaxBECount, WideTy);
-          const SCEV *OperandExtendedAdd =
-            getAddExpr(WideStart,
-                       getMulExpr(WideMaxBECount,
-                                  getZeroExtendExpr(Step, WideTy)));
+              getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache);
+          const SCEV *OperandExtendedAdd = getAddExpr(
+              WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached(
+                                                        Step, WideTy, Cache)));
          if (ZAdd == OperandExtendedAdd) {
            // Cache knowledge of AR NUW, which is propagated to this AddRec.
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+                getZeroExtendExprCached(Step, Ty, Cache), L,
+                AR->getNoWrapFlags());
          }
          // Similar to above, only this time treat the step value as signed.
          // This covers loops that count down.
@ -1609,7 +1639,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
          }
        }
@ -1641,8 +1671,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+                getZeroExtendExprCached(Step, Ty, Cache), L,
+                AR->getNoWrapFlags());
          }
        } else if (isKnownNegative(Step)) {
          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
@ -1657,7 +1688,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
          }
        }
@ -1666,8 +1697,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
      if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
        return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Cache),
+            getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
      }
    }

@ -1678,7 +1709,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
      // commute the zero extension with the addition operation.
      SmallVector<const SCEV *, 4> Ops;
      for (const auto *Op : SA->operands())
-        Ops.push_back(getZeroExtendExpr(Op, Ty));
+        Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache));
      return getAddExpr(Ops, SCEV::FlagNUW);
    }
  }
@ -1692,8 +1723,31 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
  return S;
 }

-const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
-                                               Type *Ty) {
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) {
+  // Use the local cache to prevent exponential behavior of
+  // getSignExtendExprImpl.
+  ExtendCacheTy Cache;
+  return getSignExtendExprCached(Op, Ty, Cache);
+}
+
+/// Query \p Cache before calling getSignExtendExprImpl. If there is no
+/// related entry in the \p Cache, call getSignExtendExprImpl and save
+/// the result in the \p Cache.
+const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty,
+                                                     ExtendCacheTy &Cache) {
+  auto It = Cache.find({Op, Ty});
+  if (It != Cache.end())
+    return It->second;
+  const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache);
+  auto InsertResult = Cache.insert({{Op, Ty}, SExt});
+  assert(InsertResult.second && "Expect the key was not in the cache");
+  (void)InsertResult;
+  return SExt;
+}
+
+/// The real implementation of getSignExtendExpr.
+const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
+                                                   ExtendCacheTy &Cache) {
  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
         "This is not an extending conversion!");
  assert(isSCEVable(Ty) &&
@ -1703,11 +1757,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
  // Fold if the operand is constant.
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    return getConstant(
-      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+        cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));

  // sext(sext(x)) --> sext(x)
  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
-    return getSignExtendExpr(SS->getOperand(), Ty);
+    return getSignExtendExprCached(SS->getOperand(), Ty, Cache);

  // sext(zext(x)) --> zext(x)
  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
@ -1746,8 +1800,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
          const APInt &C2 = SC2->getAPInt();
          if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
              C2.ugt(C1) && C2.isPowerOf2())
-            return getAddExpr(getSignExtendExpr(SC1, Ty),
-                              getSignExtendExpr(SMul, Ty));
+            return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache),
+                              getSignExtendExprCached(SMul, Ty, Cache));
        }
      }
    }
@ -1758,7 +1812,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
      // commute the sign extension with the addition operation.
      SmallVector<const SCEV *, 4> Ops;
      for (const auto *Op : SA->operands())
-        Ops.push_back(getSignExtendExpr(Op, Ty));
+        Ops.push_back(getSignExtendExprCached(Op, Ty, Cache));
      return getAddExpr(Ops, SCEV::FlagNSW);
    }
  }
@ -1782,8 +1836,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
      // we don't need to do any further analysis.
      if (AR->hasNoSignedWrap())
        return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-            getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+            getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW);

      // Check whether the backedge-taken count is SCEVCouldNotCompute.
      // Note that this serves two purposes: It filters out loops that are
@ -1808,21 +1862,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
          // Check whether Start+Step*MaxBECount has no signed overflow.
          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
-          const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+          const SCEV *SAdd =
+              getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache);
+          const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache);
          const SCEV *WideMaxBECount =
-            getZeroExtendExpr(CastedMaxBECount, WideTy);
-          const SCEV *OperandExtendedAdd =
-            getAddExpr(WideStart,
-                       getMulExpr(WideMaxBECount,
-                                  getSignExtendExpr(Step, WideTy)));
+              getZeroExtendExpr(CastedMaxBECount, WideTy);
+          const SCEV *OperandExtendedAdd = getAddExpr(
+              WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached(
+                                                        Step, WideTy, Cache)));
          if (SAdd == OperandExtendedAdd) {
            // Cache knowledge of AR NSW, which is propagated to this AddRec.
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+                getSignExtendExprCached(Step, Ty, Cache), L,
+                AR->getNoWrapFlags());
          }
          // Similar to above, only this time treat the step value as unsigned.
          // This covers loops that count up with an unsigned step.
@ -1843,7 +1898,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,

            // Return the expression with the addrec on the outside.
            return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
          }
        }
@ -1875,8 +1930,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
          return getAddRecExpr(
-              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-              getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+              getSignExtendExprCached(Step, Ty, Cache), L,
+              AR->getNoWrapFlags());
        }
      }

@ -1890,18 +1946,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
        const APInt &C2 = SC2->getAPInt();
        if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
            C2.isPowerOf2()) {
-          Start = getSignExtendExpr(Start, Ty);
+          Start = getSignExtendExprCached(Start, Ty, Cache);
          const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
                                            AR->getNoWrapFlags());
-          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+          return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache));
        }
      }

      if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
        return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-            getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Cache),
+            getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags());
      }
    }

@ -3951,9 +4007,9 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {

  case Instruction::Xor:
    if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
-      // If the RHS of the xor is a signbit, then this is just an add.
-      // Instcombine turns add of signbit into xor as a strength reduction step.
-      if (RHSC->getValue().isSignBit())
+      // If the RHS of the xor is a signmask, then this is just an add.
+      // Instcombine turns add of signmask into xor as a strength reduction step.
+      if (RHSC->getValue().isSignMask())
        return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
    return BinaryOp(Op);

@ -5272,28 +5328,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      break;

    case Instruction::Or:
-      // If the RHS of the Or is a constant, we may have something like:
-      // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
-      // optimizations will transparently handle this case.
-      //
-      // In order for this transformation to be safe, the LHS must be of the
-      // form X*(2^n) and the Or constant must be less than 2^n.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
-        const SCEV *LHS = getSCEV(BO->LHS);
-        const APInt &CIVal = CI->getValue();
-        if (GetMinTrailingZeros(LHS) >=
-            (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
-          // Build a plain add SCEV.
-          const SCEV *S = getAddExpr(LHS, getSCEV(CI));
-          // If the LHS of the add was an addrec and it has no-wrap flags,
-          // transfer the no-wrap flags, since an or won't introduce a wrap.
-          if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
-            const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
-            const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
-                OldAR->getNoWrapFlags());
-          }
-          return S;
-        }
+      // Use ValueTracking to check whether this is actually an add.
+      if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC,
+                              nullptr, &DT)) {
+        // There aren't any common bits set, so the add can't wrap.
+        auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW);
+        return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
      }
      break;

@ -5329,7 +5369,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
                // using an add, which is equivalent, and re-apply the zext.
                APInt Trunc = CI->getValue().trunc(Z0TySize);
                if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
-                    Trunc.isSignBit())
+                    Trunc.isSignMask())
                  return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
                                           UTy);
              }
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@ -292,15 +292,15 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
  KnownOne = PossibleSumOne & Known;

  // Are we still trying to solve for the sign bit?
-  if (!Known.isNegative()) {
+  if (!Known.isSignBitSet()) {
    if (NSW) {
      // Adding two non-negative numbers, or subtracting a negative number from
      // a non-negative one, can't wrap into negative.
-      if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+      if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
        KnownZero.setSignBit();
      // Adding two negative numbers, or subtracting a non-negative number from
      // a negative one, can't wrap into non-negative.
-      else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+      else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
        KnownOne.setSignBit();
    }
  }
@ -322,10 +322,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
      // The product of a number with itself is non-negative.
      isKnownNonNegative = true;
    } else {
-      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
-      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
-      bool isKnownNegativeOp1 = KnownOne.isNegative();
-      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet();
+      bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet();
+      bool isKnownNegativeOp1 = KnownOne.isSignBitSet();
+      bool isKnownNegativeOp0 = KnownOne2.isSignBitSet();
      // The product of two numbers with the same sign is non-negative.
      isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
        (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
@ -361,9 +361,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
  // which case we prefer to follow the result of the direct computation,
  // though as the program is invoking undefined behaviour we can choose
  // whatever we like here.
-  if (isKnownNonNegative && !KnownOne.isNegative())
+  if (isKnownNonNegative && !KnownOne.isSignBitSet())
    KnownZero.setSignBit();
-  else if (isKnownNegative && !KnownZero.isNegative())
+  else if (isKnownNegative && !KnownZero.isSignBitSet())
    KnownOne.setSignBit();
 }

@ -661,8 +661,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
      // For those bits in RHS that are known, we can propagate them to known
      // bits in V shifted to the right by C.
-      KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
-      KnownOne  |= RHSKnownOne.lshr(C->getZExtValue());
+      RHSKnownZero.lshrInPlace(C->getZExtValue());
+      KnownZero |= RHSKnownZero;
+      RHSKnownOne.lshrInPlace(C->getZExtValue());
+      KnownOne  |= RHSKnownOne;
    // assume(~(v << c) = a)
    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
                                   m_Value(A))) &&
@ -672,8 +674,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
      // For those bits in RHS that are known, we can propagate them inverted
      // to known bits in V shifted to the right by C.
-      KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
-      KnownOne  |= RHSKnownZero.lshr(C->getZExtValue());
+      RHSKnownOne.lshrInPlace(C->getZExtValue());
+      KnownZero |= RHSKnownOne;
+      RHSKnownZero.lshrInPlace(C->getZExtValue());
+      KnownOne  |= RHSKnownZero;
    // assume(v >> c = a)
    } else if (match(Arg,
                     m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
@ -707,7 +711,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));

-      if (RHSKnownZero.isNegative()) {
+      if (RHSKnownZero.isSignBitSet()) {
        // We know that the sign bit is zero.
        KnownZero.setSignBit();
      }
@ -718,7 +722,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));

-      if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+      if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) {
        // We know that the sign bit is zero.
        KnownZero.setSignBit();
      }
@ -729,7 +733,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));

-      if (RHSKnownOne.isNegative()) {
+      if (RHSKnownOne.isSignBitSet()) {
        // We know that the sign bit is one.
        KnownOne.setSignBit();
      }
@ -740,7 +744,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));

-      if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+      if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) {
        // We know that the sign bit is one.
        KnownOne.setSignBit();
      }
@ -990,23 +994,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
    unsigned MaxHighZeros = 0;
    if (SPF == SPF_SMAX) {
      // If both sides are negative, the result is negative.
-      if (KnownOne.isNegative() && KnownOne2.isNegative())
+      if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
        // We can derive a lower bound on the result by taking the max of the
        // leading one bits.
        MaxHighOnes =
            std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
      // If either side is non-negative, the result is non-negative.
-      else if (KnownZero.isNegative() || KnownZero2.isNegative())
+      else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet())
        MaxHighZeros = 1;
    } else if (SPF == SPF_SMIN) {
      // If both sides are non-negative, the result is non-negative.
-      if (KnownZero.isNegative() && KnownZero2.isNegative())
+      if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
        // We can derive an upper bound on the result by taking the max of the
        // leading zero bits.
        MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
                                KnownZero2.countLeadingOnes());
      // If either side is negative, the result is negative.
-      else if (KnownOne.isNegative() || KnownOne2.isNegative())
+      else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet())
        MaxHighOnes = 1;
    } else if (SPF == SPF_UMAX) {
      // We can derive a lower bound on the result by taking the max of the
@ -1092,14 +1096,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
      KZResult.setLowBits(ShiftAmt); // Low bits known 0.
      // If this shift has "nsw" keyword, then the result is either a poison
      // value or has the same sign bit as the first operand.
-      if (NSW && KnownZero.isNegative())
+      if (NSW && KnownZero.isSignBitSet())
        KZResult.setSignBit();
      return KZResult;
    };

    auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
      APInt KOResult = KnownOne << ShiftAmt;
-      if (NSW && KnownOne.isNegative())
+      if (NSW && KnownOne.isSignBitSet())
        KOResult.setSignBit();
      return KOResult;
    };
@ -1111,10 +1115,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
  }
  case Instruction::LShr: {
    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
-      return KnownZero.lshr(ShiftAmt) |
-             // High bits known zero.
-             APInt::getHighBitsSet(BitWidth, ShiftAmt);
+    auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
+      APInt KZResult = KnownZero.lshr(ShiftAmt);
+      // High bits known zero.
+      KZResult.setHighBits(ShiftAmt);
+      return KZResult;
    };

    auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
@ -1169,28 +1174,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,

        // If the first operand is non-negative or has all low bits zero, then
        // the upper bits are all zero.
-        if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits))
+        if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits))
          KnownZero |= ~LowBits;

        // If the first operand is negative and not all low bits are zero, then
        // the upper bits are all one.
-        if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0))
+        if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0))
          KnownOne |= ~LowBits;

        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        break;
      }
    }

    // The sign bit is the LHS's sign bit, except when the result of the
    // remainder is zero.
-    if (KnownZero.isNonNegative()) {
-      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
-                       Q);
-      // If it's known zero, our sign bit is also zero.
-      if (LHSKnownZero.isNegative())
-        KnownZero.setSignBit();
-    }
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+    // If it's known zero, our sign bit is also zero.
+    if (KnownZero2.isSignBitSet())
+      KnownZero.setSignBit();

    break;
  case Instruction::URem: {
@ -1331,24 +1333,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
            // (add non-negative, non-negative) --> non-negative
            // (add negative, negative) --> negative
            if (Opcode == Instruction::Add) {
-              if (KnownZero2.isNegative() && KnownZero3.isNegative())
+              if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet())
                KnownZero.setSignBit();
-              else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+              else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet())
                KnownOne.setSignBit();
            }

            // (sub nsw non-negative, negative) --> non-negative
            // (sub nsw negative, non-negative) --> negative
            else if (Opcode == Instruction::Sub && LL == I) {
-              if (KnownZero2.isNegative() && KnownOne3.isNegative())
+              if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet())
                KnownZero.setSignBit();
-              else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+              else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet())
                KnownOne.setSignBit();
            }

            // (mul nsw non-negative, non-negative) --> non-negative
-            else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
-                     KnownZero3.isNegative())
+            else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() &&
+                     KnownZero3.isSignBitSet())
              KnownZero.setSignBit();
          }

@ -1614,8 +1616,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
  APInt ZeroBits(BitWidth, 0);
  APInt OneBits(BitWidth, 0);
  computeKnownBits(V, ZeroBits, OneBits, Depth, Q);
-  KnownOne = OneBits.isNegative();
-  KnownZero = ZeroBits.isNegative();
+  KnownOne = OneBits.isSignBitSet();
+  KnownZero = ZeroBits.isSignBitSet();
 }

 /// Return true if the given value is known to have exactly one
@ -1638,9 +1640,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
  if (match(V, m_Shl(m_One(), m_Value())))
    return true;

-  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
-  // bottom.  If it is shifted off the bottom then the result is undefined.
-  if (match(V, m_LShr(m_SignBit(), m_Value())))
+  // (signmask) >>l X is clearly a power of two if the one is not shifted off
+  // the bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignMask(), m_Value())))
    return true;

  // The remaining tests are all recursive, so bail out if we hit the limit.
@ -2241,7 +2243,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,

        // If we are subtracting one from a positive number, there is no carry
        // out of the result.
-        if (KnownZero.isNegative())
+        if (KnownZero.isSignBitSet())
          return Tmp;
      }

@ -2265,7 +2267,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,

        // If the input is known to be positive (the sign bit is known clear),
        // the output of the NEG has the same number of sign bits as the input.
-        if (KnownZero.isNegative())
+        if (KnownZero.isSignBitSet())
          return Tmp2;

        // Otherwise, we treat this like a SUB.
@ -2322,10 +2324,10 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,

  // If we know that the sign bit is either zero or one, determine the number of
  // identical bits in the top of the input value.
-  if (KnownZero.isNegative())
+  if (KnownZero.isSignBitSet())
    return std::max(FirstAnswer, KnownZero.countLeadingOnes());

-  if (KnownOne.isNegative())
+  if (KnownOne.isSignBitSet())
    return std::max(FirstAnswer, KnownOne.countLeadingOnes());

  // computeKnownBits gave us no extra information about the top bits.
@ -3556,14 +3558,14 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
  // We know the multiply operation doesn't overflow if the maximum values for
  // each operand will not overflow after we multiply them together.
  bool MaxOverflow;
-  LHSMax.umul_ov(RHSMax, MaxOverflow);
+  (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
  if (!MaxOverflow)
    return OverflowResult::NeverOverflows;

  // We know it always overflows if multiplying the smallest possible values for
  // the operands also results in overflow.
  bool MinOverflow;
-  LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+  (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
  if (MinOverflow)
    return OverflowResult::AlwaysOverflows;

--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@ -143,27 +143,24 @@ bool LLParser::ValidateEndOfModule() {
        FnAttrs.removeAttribute(Attribute::Alignment);
      }

-      AS = AS.addAttributes(
-          Context, AttributeList::FunctionIndex,
-          AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+      AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+                            AttributeSet::get(Context, FnAttrs));
      Fn->setAttributes(AS);
    } else if (CallInst *CI = dyn_cast<CallInst>(V)) {
      AttributeList AS = CI->getAttributes();
      AttrBuilder FnAttrs(AS.getFnAttributes());
      AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
      FnAttrs.merge(B);
-      AS = AS.addAttributes(
-          Context, AttributeList::FunctionIndex,
-          AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+      AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+                            AttributeSet::get(Context, FnAttrs));
      CI->setAttributes(AS);
    } else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
      AttributeList AS = II->getAttributes();
      AttrBuilder FnAttrs(AS.getFnAttributes());
      AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
      FnAttrs.merge(B);
-      AS = AS.addAttributes(
-          Context, AttributeList::FunctionIndex,
-          AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
+      AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+                            AttributeSet::get(Context, FnAttrs));
      II->setAttributes(AS);
    } else {
      llvm_unreachable("invalid object with forward attribute group reference");
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -372,15 +372,27 @@ Expected<std::string> readTriple(BitstreamCursor &Stream) {

 class BitcodeReaderBase {
 protected:
-  BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) {
+  BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab)
+      : Stream(std::move(Stream)), Strtab(Strtab) {
    this->Stream.setBlockInfo(&BlockInfo);
  }

  BitstreamBlockInfo BlockInfo;
  BitstreamCursor Stream;
+  StringRef Strtab;
+
+  /// In version 2 of the bitcode we store names of global values and comdats in
+  /// a string table rather than in the VST.
+  bool UseStrtab = false;

  Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record);

+  /// If this module uses a string table, pop the reference to the string table
+  /// and return the referenced string and the rest of the record. Otherwise
+  /// just return the record itself.
+  std::pair<StringRef, ArrayRef<uint64_t>>
+  readNameFromStrtab(ArrayRef<uint64_t> Record);
+
  bool readBlockInfo();

  // Contains an arbitrary and optional string identifying the bitcode producer
@ -402,11 +414,22 @@ BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
  if (Record.size() < 1)
    return error("Invalid record");
  unsigned ModuleVersion = Record[0];
-  if (ModuleVersion > 1)
+  if (ModuleVersion > 2)
    return error("Invalid value");
+  UseStrtab = ModuleVersion >= 2;
  return ModuleVersion;
 }

+std::pair<StringRef, ArrayRef<uint64_t>>
+BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) {
+  if (!UseStrtab)
+    return {"", Record};
+  // Invalid reference. Let the caller complain about the record being empty.
+  if (Record[0] + Record[1] > Strtab.size())
+    return {"", {}};
+  return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)};
+}
+
 class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
  LLVMContext &Context;
  Module *TheModule = nullptr;
@ -492,8 +515,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
  std::vector<std::string> BundleTags;

 public:
-  BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification,
-                LLVMContext &Context);
+  BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
+                StringRef ProducerIdentification, LLVMContext &Context);

  Error materializeForwardReferencedFunctions();

@ -628,7 +651,10 @@ private:

  Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
                                unsigned NameIndex, Triple &TT);
+  void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F,
+                               ArrayRef<uint64_t> Record);
  Error parseValueSymbolTable(uint64_t Offset = 0);
+  Error parseGlobalValueSymbolTable();
  Error parseConstants();
  Error rememberAndSkipFunctionBodies();
  Error rememberAndSkipFunctionBody();
@ -681,12 +707,15 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
  std::string SourceFileName;

 public:
-  ModuleSummaryIndexBitcodeReader(
-      BitstreamCursor Stream, ModuleSummaryIndex &TheIndex);
+  ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab,
+                                  ModuleSummaryIndex &TheIndex);

  Error parseModule(StringRef ModulePath);

 private:
+  void setValueGUID(uint64_t ValueID, StringRef ValueName,
+                    GlobalValue::LinkageTypes Linkage,
+                    StringRef SourceFileName);
  Error parseValueSymbolTable(
      uint64_t Offset,
      DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
@ -716,10 +745,10 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
  return std::error_code();
 }

-BitcodeReader::BitcodeReader(BitstreamCursor Stream,
+BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                             StringRef ProducerIdentification,
                             LLVMContext &Context)
-    : BitcodeReaderBase(std::move(Stream)), Context(Context),
+    : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
      ValueList(Context) {
  this->ProducerIdentification = ProducerIdentification;
 }
@ -1749,6 +1778,54 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset,
  return CurrentBit;
 }

+void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta,
+                                            Function *F,
+                                            ArrayRef<uint64_t> Record) {
+  // Note that we subtract 1 here because the offset is relative to one word
+  // before the start of the identification or module block, which was
+  // historically always the start of the regular bitcode header.
+  uint64_t FuncWordOffset = Record[1] - 1;
+  uint64_t FuncBitOffset = FuncWordOffset * 32;
+  DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
+  // Set the LastFunctionBlockBit to point to the last function block.
+  // Later when parsing is resumed after function materialization,
+  // we can simply skip that last function block.
+  if (FuncBitOffset > LastFunctionBlockBit)
+    LastFunctionBlockBit = FuncBitOffset;
+}
+
+/// Read a new-style GlobalValue symbol table.
+Error BitcodeReader::parseGlobalValueSymbolTable() {
+  unsigned FuncBitcodeOffsetDelta =
+      Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
+
+  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return Error::success();
+    case BitstreamEntry::Record:
+      break;
+    }
+
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    case bitc::VST_CODE_FNENTRY: // [valueid, offset]
+      setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
+                              cast<Function>(ValueList[Record[0]]), Record);
+      break;
+    }
+  }
+}
+
 /// Parse the value symbol table at either the current parsing location or
 /// at the given bit offset if provided.
 Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
@ -1756,8 +1833,18 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
  // Pass in the Offset to distinguish between calling for the module-level
  // VST (where we want to jump to the VST offset) and the function-level
  // VST (where we don't).
-  if (Offset > 0)
+  if (Offset > 0) {
    CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+    // If this module uses a string table, read this as a module-level VST.
+    if (UseStrtab) {
+      if (Error Err = parseGlobalValueSymbolTable())
+        return Err;
+      Stream.JumpToBit(CurrentBit);
+      return Error::success();
+    }
+    // Otherwise, the VST will be in a similar format to a function-level VST,
+    // and will contain symbol names.
+  }

  // Compute the delta between the bitcode indices in the VST (the word offset
  // to the word-aligned ENTER_SUBBLOCK for the function block, and that
@ -1818,23 +1905,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
        return Err;
      Value *V = ValOrErr.get();

-      auto *F = dyn_cast<Function>(V);
      // Ignore function offsets emitted for aliases of functions in older
      // versions of LLVM.
-      if (!F)
-        break;
-
-      // Note that we subtract 1 here because the offset is relative to one word
-      // before the start of the identification or module block, which was
-      // historically always the start of the regular bitcode header.
-      uint64_t FuncWordOffset = Record[1] - 1;
-      uint64_t FuncBitOffset = FuncWordOffset * 32;
-      DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
-      // Set the LastFunctionBlockBit to point to the last function block.
-      // Later when parsing is resumed after function materialization,
-      // we can simply skip that last function block.
-      if (FuncBitOffset > LastFunctionBlockBit)
-        LastFunctionBlockBit = FuncBitOffset;
+      if (auto *F = dyn_cast<Function>(V))
+        setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record);
      break;
    }
    case bitc::VST_CODE_BBENTRY: {
@ -2557,6 +2631,7 @@ Error BitcodeReader::globalCleanup() {

  // Look for intrinsic functions which need to be upgraded at some point
  for (Function &F : *TheModule) {
+    MDLoader->upgradeDebugIntrinsics(F);
    Function *NewFn;
    if (UpgradeIntrinsicFunction(&F, NewFn))
      UpgradedIntrinsics[&F] = NewFn;
@ -2626,15 +2701,24 @@ bool BitcodeReaderBase::readBlockInfo() {
 }

 Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
-  // [selection_kind, name]
-  if (Record.size() < 2)
+  // v1: [selection_kind, name]
+  // v2: [strtab_offset, strtab_size, selection_kind]
+  StringRef Name;
+  std::tie(Name, Record) = readNameFromStrtab(Record);
+
+  if (Record.size() < 1)
    return error("Invalid record");
  Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
-  std::string Name;
-  unsigned ComdatNameSize = Record[1];
-  Name.reserve(ComdatNameSize);
-  for (unsigned i = 0; i != ComdatNameSize; ++i)
-    Name += (char)Record[2 + i];
+  std::string OldFormatName;
+  if (!UseStrtab) {
+    if (Record.size() < 2)
+      return error("Invalid record");
+    unsigned ComdatNameSize = Record[1];
+    OldFormatName.reserve(ComdatNameSize);
+    for (unsigned i = 0; i != ComdatNameSize; ++i)
+      OldFormatName += (char)Record[2 + i];
+    Name = OldFormatName;
+  }
  Comdat *C = TheModule->getOrInsertComdat(Name);
  C->setSelectionKind(SK);
  ComdatList.push_back(C);
@ -2642,9 +2726,13 @@ Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
 }

 Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
-  // [pointer type, isconst, initid, linkage, alignment, section,
+  // v1: [pointer type, isconst, initid, linkage, alignment, section,
  // visibility, threadlocal, unnamed_addr, externally_initialized,
-  // dllstorageclass, comdat]
+  // dllstorageclass, comdat] (name in VST)
+  // v2: [strtab_offset, strtab_size, v1]
+  StringRef Name;
+  std::tie(Name, Record) = readNameFromStrtab(Record);
+
  if (Record.size() < 6)
    return error("Invalid record");
  Type *Ty = getTypeByID(Record[0]);
@ -2692,7 +2780,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
    ExternallyInitialized = Record[9];

  GlobalVariable *NewGV =
-      new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "",
+      new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name,
                         nullptr, TLM, AddressSpace, ExternallyInitialized);
  NewGV->setAlignment(Alignment);
  if (!Section.empty())
@ -2724,9 +2812,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
 }

 Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
-  // [type, callingconv, isproto, linkage, paramattr, alignment, section,
+  // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section,
  // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
-  // prefixdata]
+  // prefixdata] (name in VST)
+  // v2: [strtab_offset, strtab_size, v1]
+  StringRef Name;
+  std::tie(Name, Record) = readNameFromStrtab(Record);
+
  if (Record.size() < 8)
    return error("Invalid record");
  Type *Ty = getTypeByID(Record[0]);
@ -2742,7 +2834,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
    return error("Invalid calling convention ID");

  Function *Func =
-      Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule);
+      Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule);

  Func->setCallingConv(CC);
  bool isProto = Record[2];
@ -2810,11 +2902,15 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {

 Error BitcodeReader::parseGlobalIndirectSymbolRecord(
    unsigned BitCode, ArrayRef<uint64_t> Record) {
-  // ALIAS_OLD: [alias type, aliasee val#, linkage]
-  // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
-  // dllstorageclass]
-  // IFUNC: [alias type, addrspace, aliasee val#, linkage,
-  // visibility, dllstorageclass]
+  // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST)
+  // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
+  // dllstorageclass] (name in VST)
+  // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage,
+  // visibility, dllstorageclass] (name in VST)
+  // v2: [strtab_offset, strtab_size, v1]
+  StringRef Name;
+  std::tie(Name, Record) = readNameFromStrtab(Record);
+
  bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD;
  if (Record.size() < (3 + (unsigned)NewRecord))
    return error("Invalid record");
@ -2839,10 +2935,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
  GlobalIndirectSymbol *NewGA;
  if (BitCode == bitc::MODULE_CODE_ALIAS ||
      BitCode == bitc::MODULE_CODE_ALIAS_OLD)
-    NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+    NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
                                TheModule);
  else
-    NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+    NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
                                nullptr, TheModule);
  // Old bitcode files didn't have visibility field.
  // Local linkage must have default visibility.
@ -4570,8 +4666,8 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
 }

 ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
-    BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex)
-    : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {}
+    BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex)
+    : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {}

 std::pair<GlobalValue::GUID, GlobalValue::GUID>
 ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
@ -4580,12 +4676,32 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
  return VGI->second;
 }

+void ModuleSummaryIndexBitcodeReader::setValueGUID(
+    uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage,
+    StringRef SourceFileName) {
+  std::string GlobalId =
+      GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
+  auto ValueGUID = GlobalValue::getGUID(GlobalId);
+  auto OriginalNameID = ValueGUID;
+  if (GlobalValue::isLocalLinkage(Linkage))
+    OriginalNameID = GlobalValue::getGUID(ValueName);
+  if (PrintSummaryGUIDs)
+    dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is "
+           << ValueName << "\n";
+  ValueIdToCallGraphGUIDMap[ValueID] =
+      std::make_pair(ValueGUID, OriginalNameID);
+}
+
 // Specialized value symbol table parser used when reading module index
 // blocks where we don't actually create global values. The parsed information
 // is saved in the bitcode reader for use when later parsing summaries.
 Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
    uint64_t Offset,
    DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap) {
+  // With a strtab the VST is not required to parse the summary.
+  if (UseStrtab)
+    return Error::success();
+
  assert(Offset > 0 && "Expected non-zero VST offset");
  uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream);

@ -4627,17 +4743,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
      assert(VLI != ValueIdToLinkageMap.end() &&
             "No linkage found for VST entry?");
      auto Linkage = VLI->second;
-      std::string GlobalId =
-          GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName);
-      auto ValueGUID = GlobalValue::getGUID(GlobalId);
-      auto OriginalNameID = ValueGUID;
-      if (GlobalValue::isLocalLinkage(Linkage))
-        OriginalNameID = GlobalValue::getGUID(ValueName);
-      if (PrintSummaryGUIDs)
-        dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is "
-               << ValueName << "\n";
-      ValueIdToCallGraphGUIDMap[ValueID] =
-          std::make_pair(ValueGUID, OriginalNameID);
+      setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
      ValueName.clear();
      break;
    }
@ -4651,18 +4757,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
      assert(VLI != ValueIdToLinkageMap.end() &&
             "No linkage found for VST entry?");
      auto Linkage = VLI->second;
-      std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier(
-          ValueName, VLI->second, SourceFileName);
-      auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId);
-      auto OriginalNameID = FunctionGUID;
-      if (GlobalValue::isLocalLinkage(Linkage))
-        OriginalNameID = GlobalValue::getGUID(ValueName);
-      if (PrintSummaryGUIDs)
-        dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is "
-               << ValueName << "\n";
-      ValueIdToCallGraphGUIDMap[ValueID] =
-          std::make_pair(FunctionGUID, OriginalNameID);
-
+      setValueGUID(ValueID, ValueName, Linkage, SourceFileName);
      ValueName.clear();
      break;
    }
@ -4749,6 +4844,11 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
        switch (BitCode) {
        default:
          break; // Default behavior, ignore unknown content.
+        case bitc::MODULE_CODE_VERSION: {
+          if (Error Err = parseVersionRecord(Record).takeError())
+            return Err;
+          break;
+        }
        /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
        case bitc::MODULE_CODE_SOURCE_FILENAME: {
          SmallString<128> ValueName;
@ -4783,17 +4883,26 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
          // was historically always the start of the regular bitcode header.
          VSTOffset = Record[0] - 1;
          break;
-        // GLOBALVAR: [pointer type, isconst,     initid,       linkage, ...]
-        // FUNCTION:  [type,         callingconv, isproto,      linkage, ...]
-        // ALIAS:     [alias type,   addrspace,   aliasee val#, linkage, ...]
+        // v1 GLOBALVAR: [pointer type, isconst,     initid,       linkage, ...]
+        // v1 FUNCTION:  [type,         callingconv, isproto,      linkage, ...]
+        // v1 ALIAS:     [alias type,   addrspace,   aliasee val#, linkage, ...]
+        // v2: [strtab offset, strtab size, v1]
        case bitc::MODULE_CODE_GLOBALVAR:
        case bitc::MODULE_CODE_FUNCTION:
        case bitc::MODULE_CODE_ALIAS: {
-          if (Record.size() <= 3)
+          StringRef Name;
+          ArrayRef<uint64_t> GVRecord;
+          std::tie(Name, GVRecord) = readNameFromStrtab(Record);
+          if (GVRecord.size() <= 3)
            return error("Invalid record");
-          uint64_t RawLinkage = Record[3];
+          uint64_t RawLinkage = GVRecord[3];
          GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
-          ValueIdToLinkageMap[ValueId++] = Linkage;
+          if (!UseStrtab) {
+            ValueIdToLinkageMap[ValueId++] = Linkage;
+            break;
+          }
+
+          setValueGUID(ValueId++, Name, Linkage, SourceFileName);
          break;
        }
        }
@ -4904,6 +5013,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
    switch (BitCode) {
    default: // Default behavior: ignore.
      break;
+    case bitc::FS_VALUE_GUID: { // [valueid, refguid]
+      uint64_t ValueID = Record[0];
+      GlobalValue::GUID RefGUID = Record[1];
+      ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID);
+      break;
+    }
    // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid,
    //                n x (valueid)]
    // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
@ -5208,6 +5323,35 @@ const std::error_category &llvm::BitcodeErrorCategory() {
  return *ErrorCategory;
 }

+static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
+  if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID))
+    return error("Invalid record");
+
+  StringRef Strtab;
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case BitstreamEntry::EndBlock:
+      return Strtab;
+
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+
+    case BitstreamEntry::SubBlock:
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      break;
+
+    case BitstreamEntry::Record:
+      StringRef Blob;
+      SmallVector<uint64_t, 1> Record;
+      if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB)
+        Strtab = Blob;
+      break;
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // External interface
 //===----------------------------------------------------------------------===//
@ -5260,6 +5404,22 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
        continue;
      }

+      if (Entry.ID == bitc::STRTAB_BLOCK_ID) {
+        Expected<StringRef> Strtab = readStrtab(Stream);
+        if (!Strtab)
+          return Strtab.takeError();
+        // This string table is used by every preceding bitcode module that does
+        // not have its own string table. A bitcode file may have multiple
+        // string tables if it was created by binary concatenation, for example
+        // with "llvm-cat -b".
+        for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) {
+          if (!I->Strtab.empty())
+            break;
+          I->Strtab = *Strtab;
+        }
+        continue;
+      }
+
      if (Stream.SkipBlock())
        return error("Malformed block");
      continue;
@ -5296,8 +5456,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
  }

  Stream.JumpToBit(ModuleBit);
-  auto *R =
-      new BitcodeReader(std::move(Stream), ProducerIdentification, Context);
+  auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification,
+                              Context);

  std::unique_ptr<Module> M =
      llvm::make_unique<Module>(ModuleIdentifier, Context);
@ -5332,7 +5492,7 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
  Stream.JumpToBit(ModuleBit);

  auto Index = llvm::make_unique<ModuleSummaryIndex>();
-  ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index);
+  ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index);

  if (Error Err = R.parseModule(ModuleIdentifier))
    return std::move(Err);
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@ -54,6 +54,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
@ -452,6 +453,7 @@ class MetadataLoader::MetadataLoaderImpl {
  bool StripTBAA = false;
  bool HasSeenOldLoopTags = false;
  bool NeedUpgradeToDIGlobalVariableExpression = false;
+  bool NeedDeclareExpressionUpgrade = false;

  /// True if metadata is being parsed for a module being ThinLTO imported.
  bool IsImporting = false;
@ -511,6 +513,26 @@ class MetadataLoader::MetadataLoaderImpl {
    }
  }

+  /// Remove a leading DW_OP_deref from DIExpressions in a dbg.declare that
+  /// describes a function argument.
+  void upgradeDeclareExpressions(Function &F) {
+    if (!NeedDeclareExpressionUpgrade)
+      return;
+
+    for (auto &BB : F)
+      for (auto &I : BB)
+        if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
+          if (auto *DIExpr = DDI->getExpression())
+            if (DIExpr->startsWithDeref() &&
+                dyn_cast_or_null<Argument>(DDI->getAddress())) {
+              SmallVector<uint64_t, 8> Ops;
+              Ops.append(std::next(DIExpr->elements_begin()),
+                         DIExpr->elements_end());
+              auto *E = DIExpression::get(Context, Ops);
+              DDI->setOperand(2, MetadataAsValue::get(Context, E));
+            }
+  }
+
  void upgradeDebugInfo() {
    upgradeCUSubprograms();
    upgradeCUVariables();
@ -565,6 +587,7 @@ public:

  unsigned size() const { return MetadataList.size(); }
  void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); }
+  void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); }
 };

 static Error error(const Twine &Message) {
@ -1520,12 +1543,32 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
      return error("Invalid record");

    IsDistinct = Record[0] & 1;
-    bool HasOpFragment = Record[0] & 2;
+    uint64_t Version = Record[0] >> 1;
    auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
-    if (!HasOpFragment)
-      if (unsigned N = Elts.size())
-        if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
-          Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+    unsigned N = Elts.size();
+    // Perform various upgrades.
+    switch (Version) {
+    case 0:
+      if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
+        Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+      LLVM_FALLTHROUGH;
+    case 1:
+      // Move DW_OP_deref to the end.
+      if (N && Elts[0] == dwarf::DW_OP_deref) {
+        auto End = Elts.end();
+        if (Elts.size() >= 3 && *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment)
+          End = std::prev(End, 3);
+        std::move(std::next(Elts.begin()), End, Elts.begin());
+        *std::prev(End) = dwarf::DW_OP_deref;
+      }
+      NeedDeclareExpressionUpgrade = true;
+      LLVM_FALLTHROUGH;
+    case 2:
+      // Up-to-date!
+      break;
+    default:
+      return error("Invalid record");
+    }

    MetadataList.assignValue(
        GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
@ -1858,3 +1901,7 @@ bool MetadataLoader::isStrippingTBAA() { return Pimpl->isStrippingTBAA(); }

 unsigned MetadataLoader::size() const { return Pimpl->size(); }
 void MetadataLoader::shrinkTo(unsigned N) { return Pimpl->shrinkTo(N); }
+
+void MetadataLoader::upgradeDebugIntrinsics(Function &F) {
+  return Pimpl->upgradeDebugIntrinsics(F);
+}
--- a/lib/Bitcode/Reader/MetadataLoader.h
+++ b/lib/Bitcode/Reader/MetadataLoader.h
@ -79,6 +79,9 @@ public:

  unsigned size() const;
  void shrinkTo(unsigned N);
+
+  /// Perform bitcode upgrades on llvm.dbg.* calls.
+  void upgradeDebugIntrinsics(Function &F);
 };
 }

--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@ -28,6 +28,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/UseListOrder.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Program.h"
@ -76,26 +77,28 @@ protected:
  /// The stream created and owned by the client.
  BitstreamWriter &Stream;

-  /// Saves the offset of the VSTOffset record that must eventually be
-  /// backpatched with the offset of the actual VST.
-  uint64_t VSTOffsetPlaceholder = 0;
-
 public:
  /// Constructs a BitcodeWriterBase object that writes to the provided
  /// \p Stream.
  BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}

 protected:
-  bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
-  void writeValueSymbolTableForwardDecl();
  void writeBitcodeHeader();
+  void writeModuleVersion();
 };

+void BitcodeWriterBase::writeModuleVersion() {
+  // VERSION: [version#]
+  Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2});
+}
+
 /// Class to manage the bitcode writing for a module.
 class ModuleBitcodeWriter : public BitcodeWriterBase {
  /// Pointer to the buffer allocated by caller for bitcode writing.
  const SmallVectorImpl<char> &Buffer;

+  StringTableBuilder &StrtabBuilder;
+
  /// The Module to write to bitcode.
  const Module &M;

@ -127,15 +130,20 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
  /// Tracks the last value id recorded in the GUIDToValueMap.
  unsigned GlobalValueId;

+  /// Saves the offset of the VSTOffset record that must eventually be
+  /// backpatched with the offset of the actual VST.
+  uint64_t VSTOffsetPlaceholder = 0;
+
 public:
  /// Constructs a ModuleBitcodeWriter object for the given Module,
  /// writing to the provided \p Buffer.
  ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
+                      StringTableBuilder &StrtabBuilder,
                      BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
                      const ModuleSummaryIndex *Index, bool GenerateHash,
                      ModuleHash *ModHash = nullptr)
-      : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
-        VE(*M, ShouldPreserveUseListOrder), Index(Index),
+      : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder),
+        M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index),
        GenerateHash(GenerateHash), ModHash(ModHash),
        BitcodeStartBit(Stream.GetCurrentBitNo()) {
    // Assign ValueIds to any callee values in the index that came from
@ -169,6 +177,7 @@ private:
  void writeAttributeTable();
  void writeTypeTable();
  void writeComdats();
+  void writeValueSymbolTableForwardDecl();
  void writeModuleInfo();
  void writeValueAsMetadata(const ValueAsMetadata *MD,
                            SmallVectorImpl<uint64_t> &Record);
@ -261,9 +270,9 @@ private:
                       SmallVectorImpl<uint64_t> &Vals);
  void writeInstruction(const Instruction &I, unsigned InstID,
                        SmallVectorImpl<unsigned> &Vals);
-  void writeValueSymbolTable(
-      const ValueSymbolTable &VST, bool IsModuleLevel = false,
-      DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex = nullptr);
+  void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST);
+  void writeGlobalValueSymbolTable(
+      DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
  void writeUseList(UseListOrder &&Order);
  void writeUseListBlock(const Function *F);
  void
@ -477,7 +486,6 @@ public:

 private:
  void writeModStrings();
-  void writeCombinedValueSymbolTable();
  void writeCombinedGlobalValueSummary();

  /// Indicates whether the provided \p ModulePath should be written into
@ -492,15 +500,15 @@ private:
    const auto &VMI = GUIDToValueIdMap.find(ValGUID);
    return VMI != GUIDToValueIdMap.end();
  }
+  void assignValueId(GlobalValue::GUID ValGUID) {
+    unsigned &ValueId = GUIDToValueIdMap[ValGUID];
+    if (ValueId == 0)
+      ValueId = ++GlobalValueId;
+  }
  unsigned getValueId(GlobalValue::GUID ValGUID) {
-    const auto &VMI = GUIDToValueIdMap.find(ValGUID);
-    // If this GUID doesn't have an entry, assign one.
-    if (VMI == GUIDToValueIdMap.end()) {
-      GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
-      return GlobalValueId;
-    } else {
-      return VMI->second;
-    }
+    auto VMI = GUIDToValueIdMap.find(ValGUID);
+    assert(VMI != GUIDToValueIdMap.end());
+    return VMI->second;
  }
  std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
 };
@ -1047,13 +1055,10 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) {
 void ModuleBitcodeWriter::writeComdats() {
  SmallVector<unsigned, 64> Vals;
  for (const Comdat *C : VE.getComdats()) {
-    // COMDAT: [selection_kind, name]
+    // COMDAT: [strtab offset, strtab size, selection_kind]
+    Vals.push_back(StrtabBuilder.add(C->getName()));
+    Vals.push_back(C->getName().size());
    Vals.push_back(getEncodedComdatSelectionKind(*C));
-    size_t Size = C->getName().size();
-    assert(isUInt<32>(Size));
-    Vals.push_back(Size);
-    for (char Chr : C->getName())
-      Vals.push_back((unsigned char)Chr);
    Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
    Vals.clear();
  }
@ -1062,7 +1067,7 @@ void ModuleBitcodeWriter::writeComdats() {
 /// Write a record that will eventually hold the word offset of the
 /// module-level VST. For now the offset is 0, which will be backpatched
 /// after the real VST is written. Saves the bit offset to backpatch.
-void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
+void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() {
  // Write a placeholder value in for the offset of the real VST,
  // which is written after the function blocks so that it can include
  // the offset of each function. The placeholder offset will be
@ -1165,6 +1170,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
    // Add an abbrev for common globals with no visibility or thread localness.
    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                              Log2_32_Ceil(MaxGlobalType+1)));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // AddrSpace << 2
@ -1188,15 +1195,42 @@ void ModuleBitcodeWriter::writeModuleInfo() {
    SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
  }

-  // Emit the global variable information.
  SmallVector<unsigned, 64> Vals;
+  // Emit the module's source file name.
+  {
+    StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
+                                            M.getSourceFileName().size());
+    BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+    if (Bits == SE_Char6)
+      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+    else if (Bits == SE_Fixed7)
+      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+    // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
+    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(AbbrevOpToUse);
+    unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+    for (const auto P : M.getSourceFileName())
+      Vals.push_back((unsigned char)P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
+    Vals.clear();
+  }
+
+  // Emit the global variable information.
  for (const GlobalVariable &GV : M.globals()) {
    unsigned AbbrevToUse = 0;

-    // GLOBALVAR: [type, isconst, initid,
+    // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
    //             linkage, alignment, section, visibility, threadlocal,
    //             unnamed_addr, externally_initialized, dllstorageclass,
    //             comdat]
+    Vals.push_back(StrtabBuilder.add(GV.getName()));
+    Vals.push_back(GV.getName().size());
    Vals.push_back(VE.getTypeID(GV.getValueType()));
    Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant());
    Vals.push_back(GV.isDeclaration() ? 0 :
@ -1226,9 +1260,12 @@ void ModuleBitcodeWriter::writeModuleInfo() {

  // Emit the function proto information.
  for (const Function &F : M) {
-    // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
-    //             section, visibility, gc, unnamed_addr, prologuedata,
-    //             dllstorageclass, comdat, prefixdata, personalityfn]
+    // FUNCTION:  [strtab offset, strtab size, type, callingconv, isproto,
+    //             linkage, paramattrs, alignment, section, visibility, gc,
+    //             unnamed_addr, prologuedata, dllstorageclass, comdat,
+    //             prefixdata, personalityfn]
+    Vals.push_back(StrtabBuilder.add(F.getName()));
+    Vals.push_back(F.getName().size());
    Vals.push_back(VE.getTypeID(F.getFunctionType()));
    Vals.push_back(F.getCallingConv());
    Vals.push_back(F.isDeclaration());
@ -1255,8 +1292,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {

  // Emit the alias information.
  for (const GlobalAlias &A : M.aliases()) {
-    // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass,
-    //         threadlocal, unnamed_addr]
+    // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage,
+    //         visibility, dllstorageclass, threadlocal, unnamed_addr]
+    Vals.push_back(StrtabBuilder.add(A.getName()));
+    Vals.push_back(A.getName().size());
    Vals.push_back(VE.getTypeID(A.getValueType()));
    Vals.push_back(A.getType()->getAddressSpace());
    Vals.push_back(VE.getValueID(A.getAliasee()));
@ -1272,7 +1311,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {

  // Emit the ifunc information.
  for (const GlobalIFunc &I : M.ifuncs()) {
-    // IFUNC: [ifunc type, address space, resolver val#, linkage, visibility]
+    // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver
+    //         val#, linkage, visibility]
+    Vals.push_back(StrtabBuilder.add(I.getName()));
+    Vals.push_back(I.getName().size());
    Vals.push_back(VE.getTypeID(I.getValueType()));
    Vals.push_back(I.getType()->getAddressSpace());
    Vals.push_back(VE.getValueID(I.getResolver()));
@ -1282,34 +1324,6 @@ void ModuleBitcodeWriter::writeModuleInfo() {
    Vals.clear();
  }

-  // Emit the module's source file name.
-  {
-    StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
-                                            M.getSourceFileName().size());
-    BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
-    if (Bits == SE_Char6)
-      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
-    else if (Bits == SE_Fixed7)
-      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
-
-    // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
-    auto Abbv = std::make_shared<BitCodeAbbrev>();
-    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(AbbrevOpToUse);
-    unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
-    for (const auto P : M.getSourceFileName())
-      Vals.push_back((unsigned char)P);
-
-    // Emit the finished record.
-    Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
-    Vals.clear();
-  }
-
-  // If we have a VST, write the VSTOFFSET record placeholder.
-  if (M.getValueSymbolTable().empty())
-    return;
  writeValueSymbolTableForwardDecl();
 }

@ -1757,9 +1771,8 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
  Record.reserve(N->getElements().size() + 1);
-
-  const uint64_t HasOpFragmentFlag = 1 << 1;
-  Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag);
+  const uint64_t Version = 2 << 1;
+  Record.push_back((uint64_t)N->isDistinct() | Version);
  Record.append(N->elements_begin(), N->elements_end());

  Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
@ -2839,78 +2852,60 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
  Vals.clear();
 }

-/// Emit names for globals/functions etc. \p IsModuleLevel is true when
-/// we are writing the module-level VST, where we are including a function
-/// bitcode index and need to backpatch the VST forward declaration record.
-void ModuleBitcodeWriter::writeValueSymbolTable(
-    const ValueSymbolTable &VST, bool IsModuleLevel,
-    DenseMap<const Function *, uint64_t> *FunctionToBitcodeIndex) {
-  if (VST.empty()) {
-    // writeValueSymbolTableForwardDecl should have returned early as
-    // well. Ensure this handling remains in sync by asserting that
-    // the placeholder offset is not set.
-    assert(!IsModuleLevel || !hasVSTOffsetPlaceholder());
-    return;
-  }
-
-  if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
-    // Get the offset of the VST we are writing, and backpatch it into
-    // the VST forward declaration record.
-    uint64_t VSTOffset = Stream.GetCurrentBitNo();
-    // The BitcodeStartBit was the stream offset of the identification block.
-    VSTOffset -= bitcodeStartBit();
-    assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
-    // Note that we add 1 here because the offset is relative to one word
-    // before the start of the identification block, which was historically
-    // always the start of the regular bitcode header.
-    Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
-  }
+/// Write a GlobalValue VST to the module. The purpose of this data structure is
+/// to allow clients to efficiently find the function body.
+void ModuleBitcodeWriter::writeGlobalValueSymbolTable(
+  DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex) {
+  // Get the offset of the VST we are writing, and backpatch it into
+  // the VST forward declaration record.
+  uint64_t VSTOffset = Stream.GetCurrentBitNo();
+  // The BitcodeStartBit was the stream offset of the identification block.
+  VSTOffset -= bitcodeStartBit();
+  assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+  // Note that we add 1 here because the offset is relative to one word
+  // before the start of the identification block, which was historically
+  // always the start of the regular bitcode header.
+  Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);

  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);

-  // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY
-  // records, which are not used in the per-function VSTs.
-  unsigned FnEntry8BitAbbrev;
-  unsigned FnEntry7BitAbbrev;
-  unsigned FnEntry6BitAbbrev;
-  unsigned GUIDEntryAbbrev;
-  if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
-    // 8-bit fixed-width VST_CODE_FNENTRY function strings.
-    auto Abbv = std::make_shared<BitCodeAbbrev>();
-    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-    FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+  unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));

-    // 7-bit fixed width VST_CODE_FNENTRY function strings.
-    Abbv = std::make_shared<BitCodeAbbrev>();
-    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-    FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+  for (const Function &F : M) {
+    uint64_t Record[2];

-    // 6-bit char6 VST_CODE_FNENTRY function strings.
-    Abbv = std::make_shared<BitCodeAbbrev>();
-    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-    FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+    if (F.isDeclaration())
+      continue;

-    // FIXME: Change the name of this record as it is now used by
-    // the per-module index as well.
-    Abbv = std::make_shared<BitCodeAbbrev>();
-    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
-    GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+    Record[0] = VE.getValueID(&F);
+
+    // Save the word offset of the function (from the start of the
+    // actual bitcode written to the stream).
+    uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit();
+    assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
+    // Note that we add 1 here because the offset is relative to one word
+    // before the start of the identification block, which was historically
+    // always the start of the regular bitcode header.
+    Record[1] = BitcodeIndex / 32 + 1;
+
+    Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev);
  }

+  Stream.ExitBlock();
+}
+
+/// Emit names for arguments, instructions and basic blocks in a function.
+void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable(
+    const ValueSymbolTable &VST) {
+  if (VST.empty())
+    return;
+
+  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
  // FIXME: Set up the abbrev, we know how many values there are!
  // FIXME: We know if the type names can use 7-bit ascii.
  SmallVector<uint64_t, 64> NameVals;
@ -2923,38 +2918,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
    unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
    NameVals.push_back(VE.getValueID(Name.getValue()));

-    Function *F = dyn_cast<Function>(Name.getValue());
-
    // VST_CODE_ENTRY:   [valueid, namechar x N]
-    // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N]
    // VST_CODE_BBENTRY: [bbid, namechar x N]
    unsigned Code;
    if (isa<BasicBlock>(Name.getValue())) {
      Code = bitc::VST_CODE_BBENTRY;
      if (Bits == SE_Char6)
        AbbrevToUse = VST_BBENTRY_6_ABBREV;
-    } else if (F && !F->isDeclaration()) {
-      // Must be the module-level VST, where we pass in the Index and
-      // have a VSTOffsetPlaceholder. The function-level VST should not
-      // contain any Function symbols.
-      assert(FunctionToBitcodeIndex);
-      assert(hasVSTOffsetPlaceholder());
-
-      // Save the word offset of the function (from the start of the
-      // actual bitcode written to the stream).
-      uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
-      assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
-      // Note that we add 1 here because the offset is relative to one word
-      // before the start of the identification block, which was historically
-      // always the start of the regular bitcode header.
-      NameVals.push_back(BitcodeIndex / 32 + 1);
-
-      Code = bitc::VST_CODE_FNENTRY;
-      AbbrevToUse = FnEntry8BitAbbrev;
-      if (Bits == SE_Char6)
-        AbbrevToUse = FnEntry6BitAbbrev;
-      else if (Bits == SE_Fixed7)
-        AbbrevToUse = FnEntry7BitAbbrev;
    } else {
      Code = bitc::VST_CODE_ENTRY;
      if (Bits == SE_Char6)
@ -2970,47 +2940,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
    Stream.EmitRecord(Code, NameVals, AbbrevToUse);
    NameVals.clear();
  }
-  // Emit any GUID valueIDs created for indirect call edges into the
-  // module-level VST.
-  if (IsModuleLevel && hasVSTOffsetPlaceholder())
-    for (const auto &GI : valueIds()) {
-      NameVals.push_back(GI.second);
-      NameVals.push_back(GI.first);
-      Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
-                        GUIDEntryAbbrev);
-      NameVals.clear();
-    }
-  Stream.ExitBlock();
-}

-/// Emit function names and summary offsets for the combined index
-/// used by ThinLTO.
-void IndexBitcodeWriter::writeCombinedValueSymbolTable() {
-  assert(hasVSTOffsetPlaceholder() && "Expected non-zero VSTOffsetPlaceholder");
-  // Get the offset of the VST we are writing, and backpatch it into
-  // the VST forward declaration record.
-  uint64_t VSTOffset = Stream.GetCurrentBitNo();
-  assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
-  Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
-
-  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
-
-  auto Abbv = std::make_shared<BitCodeAbbrev>();
-  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
-  unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
-  SmallVector<uint64_t, 64> NameVals;
-  for (const auto &GVI : valueIds()) {
-    // VST_CODE_COMBINED_ENTRY: [valueid, refguid]
-    NameVals.push_back(GVI.second);
-    NameVals.push_back(GVI.first);
-
-    // Emit the finished record.
-    Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev);
-    NameVals.clear();
-  }
  Stream.ExitBlock();
 }

@ -3114,7 +3044,7 @@ void ModuleBitcodeWriter::writeFunction(

  // Emit names for all the instructions etc.
  if (auto *Symtab = F.getValueSymbolTable())
-    writeValueSymbolTable(*Symtab);
+    writeFunctionLevelValueSymbolTable(*Symtab);

  if (NeedsMetadataAttachment)
    writeFunctionMetadataAttachment(F);
@ -3502,6 +3432,11 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
    return;
  }

+  for (const auto &GVI : valueIds()) {
+    Stream.EmitRecord(bitc::FS_VALUE_GUID,
+                      ArrayRef<uint64_t>{GVI.second, GVI.first});
+  }
+
  // Abbrev for FS_PERMODULE.
  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
@ -3594,6 +3529,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
  Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
  Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});

+  // Create value IDs for undefined references.
+  for (const auto &I : *this) {
+    if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) {
+      for (auto &RI : VS->refs())
+        assignValueId(RI.getGUID());
+      continue;
+    }
+
+    auto *FS = dyn_cast<FunctionSummary>(I.second);
+    if (!FS)
+      continue;
+    for (auto &RI : FS->refs())
+      assignValueId(RI.getGUID());
+
+    for (auto &EI : FS->calls()) {
+      GlobalValue::GUID GUID = EI.first.getGUID();
+      if (!hasValueId(GUID)) {
+        // For SamplePGO, the indirect call targets for local functions will
+        // have its original name annotated in profile. We try to find the
+        // corresponding PGOFuncName as the GUID.
+        GUID = Index.getGUIDFromOriginalID(GUID);
+        if (GUID == 0 || !hasValueId(GUID))
+          continue;
+      }
+      assignValueId(GUID);
+    }
+  }
+
+  for (const auto &GVI : valueIds()) {
+    Stream.EmitRecord(bitc::FS_VALUE_GUID,
+                      ArrayRef<uint64_t>{GVI.second, GVI.first});
+  }
+
  // Abbrev for FS_COMBINED.
  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
@ -3808,10 +3776,7 @@ void ModuleBitcodeWriter::write() {
  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
  size_t BlockStartPos = Buffer.size();

-  SmallVector<unsigned, 1> Vals;
-  unsigned CurVersion = 1;
-  Vals.push_back(CurVersion);
-  Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+  writeModuleVersion();

  // Emit blockinfo, which defines the standard abbreviations etc.
  writeBlockInfo();
@ -3857,8 +3822,7 @@ void ModuleBitcodeWriter::write() {
  if (Index)
    writePerModuleGlobalValueSummary();

-  writeValueSymbolTable(M.getValueSymbolTable(),
-                        /* IsModuleLevel */ true, &FunctionToBitcodeIndex);
+  writeGlobalValueSymbolTable(FunctionToBitcodeIndex);

  writeModuleHash(BlockStartPos);

@ -3946,13 +3910,45 @@ BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
  writeBitcodeHeader(*Stream);
 }

-BitcodeWriter::~BitcodeWriter() = default;
+BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); }
+
+void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
+  Stream->EnterSubblock(Block, 3);
+
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(Record));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+  auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv));
+
+  Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef<uint64_t>{Record}, Blob);
+
+  Stream->ExitBlock();
+}
+
+void BitcodeWriter::writeStrtab() {
+  assert(!WroteStrtab);
+
+  std::vector<char> Strtab;
+  StrtabBuilder.finalizeInOrder();
+  Strtab.resize(StrtabBuilder.getSize());
+  StrtabBuilder.write((uint8_t *)Strtab.data());
+
+  writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB,
+            {Strtab.data(), Strtab.size()});
+
+  WroteStrtab = true;
+}
+
+void BitcodeWriter::copyStrtab(StringRef Strtab) {
+  writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab);
+  WroteStrtab = true;
+}

 void BitcodeWriter::writeModule(const Module *M,
                                bool ShouldPreserveUseListOrder,
                                const ModuleSummaryIndex *Index,
                                bool GenerateHash, ModuleHash *ModHash) {
-  ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream,
+  ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream,
                                   ShouldPreserveUseListOrder, Index,
                                   GenerateHash, ModHash);
  ModuleWriter.write();
@ -3976,6 +3972,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
  BitcodeWriter Writer(Buffer);
  Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
                     ModHash);
+  Writer.writeStrtab();

  if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
    emitDarwinBCHeaderAndTrailer(Buffer, TT);
@ -3987,13 +3984,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
 void IndexBitcodeWriter::write() {
  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);

-  SmallVector<unsigned, 1> Vals;
-  unsigned CurVersion = 1;
-  Vals.push_back(CurVersion);
-  Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
-
-  // If we have a VST, write the VSTOFFSET record placeholder.
-  writeValueSymbolTableForwardDecl();
+  writeModuleVersion();

  // Write the module paths in the combined index.
  writeModStrings();
@ -4001,10 +3992,6 @@ void IndexBitcodeWriter::write() {
  // Write the summary combined index records.
  writeCombinedGlobalValueSummary();

-  // Need a special VST writer for the combined index (we don't have a
-  // real VST and real values when this is invoked).
-  writeCombinedValueSymbolTable();
-
  Stream.ExitBlock();
 }

--- a/lib/Bitcode/Writer/LLVMBuild.txt
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@ -19,4 +19,4 @@
 type = Library
 name = BitWriter
 parent = Bitcode
-required_libraries = Analysis Core Support
+required_libraries = Analysis Core MC Support
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -834,9 +834,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
  OS << " <- ";

  // The second operand is only an offset if it's an immediate.
-  bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
-  int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
-
+  bool Deref = false;
+  bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+  int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
  for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
    uint64_t Op = Expr->getElement(i);
    if (Op == dwarf::DW_OP_LLVM_fragment) {
@ -844,7 +844,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
      break;
    } else if (Deref) {
      // We currently don't support extra Offsets or derefs after the first
-      // one. Bail out early instead of emitting an incorrect comment
+      // one. Bail out early instead of emitting an incorrect comment.
      OS << " [complex expression]";
      AP.OutStreamer->emitRawComment(OS.str());
      return true;
@ -899,12 +899,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
      AP.OutStreamer->emitRawComment(OS.str());
      return true;
    }
-    if (Deref)
+    if (MemLoc || Deref)
      OS << '[';
    OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
  }

-  if (Deref)
+  if (MemLoc || Deref)
    OS << '+' << Offset << ']';

  // NOTE: Want this comment at start of line, don't emit with AddComment.
@ -1356,7 +1356,7 @@ bool AsmPrinter::doFinalization(Module &M) {
        OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
    OutStreamer->EmitLabel(AddrSymbol);

-    unsigned PtrSize = M.getDataLayout().getPointerSize(0);
+    unsigned PtrSize = MAI->getCodePointerSize();
    OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
                                 PtrSize);
  }
@ -2246,7 +2246,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
      //       chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
      ExtraBits = Realigned.getRawData()[0] &
        (((uint64_t)-1) >> (64 - ExtraBitsSize));
-      Realigned = Realigned.lshr(ExtraBitsSize);
+      Realigned.lshrInPlace(ExtraBitsSize);
    } else
      ExtraBits = Realigned.getRawData()[BitWidth / 64];
  }
@ -2781,7 +2781,7 @@ void AsmPrinter::emitXRayTable() {
  // before the function's end, we assume that this is happening after
  // the last return instruction.

-  auto WordSizeBytes = TM.getPointerSize();
+  auto WordSizeBytes = MAI->getCodePointerSize();
  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
  OutStreamer->EmitCodeAlignment(16);
  OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@ -1136,7 +1136,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
  DITypeRef ElementTypeRef = Ty->getBaseType();
  TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
  // IndexType is size_t, which depends on the bitness of the target.
-  TypeIndex IndexType = Asm->MAI->getPointerSize() == 8
+  TypeIndex IndexType = Asm->TM.getPointerSize() == 8
                            ? TypeIndex(SimpleTypeKind::UInt64Quad)
                            : TypeIndex(SimpleTypeKind::UInt32Long);

@ -1342,8 +1342,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
  assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
  TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
  TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
-  PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64
-                                                   : PointerKind::Near32;
+  PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64
+                                                 : PointerKind::Near32;
  bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
  PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
                         : PointerMode::PointerToDataMember;
@ -1458,7 +1458,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
 }

 TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
-  unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+  unsigned VSlotCount =
+      Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize());
  SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);

  VFTableShapeRecord VFTSR(Slots);
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@ -31,6 +31,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;

+#define DEBUG_TYPE "dwarfdebug"
+
 //===----------------------------------------------------------------------===//
 // DIEAbbrevData Implementation
 //===----------------------------------------------------------------------===//
@ -79,15 +81,22 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
                    dwarf::AttributeString(AttrData.getAttribute()).data());

    // Emit form type.
+#ifndef NDEBUG
+    // Could be an assertion, but this way we can see the failing form code
+    // easily, which helps track down where it came from.
+    if (!dwarf::isValidFormForVersion(AttrData.getForm(),
+                                      AP->getDwarfVersion())) {
+      DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+                   << " for DWARF version " << AP->getDwarfVersion() << "\n");
+      llvm_unreachable("Invalid form for specified DWARF version");
+    }
+#endif
    AP->EmitULEB128(AttrData.getForm(),
                    dwarf::FormEncodingString(AttrData.getForm()).data());

    // Emit value for DW_FORM_implicit_const.
-    if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
-      assert(AP->getDwarfVersion() >= 5 &&
-            "DW_FORM_implicit_const is supported starting from DWARFv5");
+    if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
      AP->EmitSLEB128(AttrData.getValue());
-    }
  }

  // Mark end of abbreviation.
@ -518,7 +527,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
  if (Form == dwarf::DW_FORM_data4) return 4;
  if (Form == dwarf::DW_FORM_sec_offset) return 4;
  if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getPointerSize();
+  return AP->MAI->getCodePointerSize();
 }

 LLVM_DUMP_METHOD
@ -540,7 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
  if (Form == dwarf::DW_FORM_data4) return 4;
  if (Form == dwarf::DW_FORM_sec_offset) return 4;
  if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getPointerSize();
+  return AP->MAI->getCodePointerSize();
 }

 LLVM_DUMP_METHOD
@ -682,7 +691,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
    return getULEB128Size(Entry->getOffset());
  case dwarf::DW_FORM_ref_addr:
    if (AP->getDwarfVersion() == 2)
-      return AP->getPointerSize();
+      return AP->MAI->getCodePointerSize();
    switch (AP->OutStreamer->getContext().getDwarfFormat()) {
    case dwarf::DWARF32:
      return 4;
@ -808,7 +817,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
    return 4;
  if (Form == dwarf::DW_FORM_sec_offset)
    return 4;
-  return AP->getPointerSize();
+  return AP->MAI->getCodePointerSize();
 }

 /// EmitValue - Emit label value.
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@ -547,18 +547,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
  for (auto &Fragment : DV.getFrameIndexExprs()) {
    unsigned FrameReg = 0;
+    const DIExpression *Expr = Fragment.Expr;
    const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
    int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
-    DwarfExpr.addFragmentOffset(Fragment.Expr);
+    DwarfExpr.addFragmentOffset(Expr);
    SmallVector<uint64_t, 8> Ops;
    Ops.push_back(dwarf::DW_OP_plus);
    Ops.push_back(Offset);
-    Ops.push_back(dwarf::DW_OP_deref);
-    Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end());
-    DIExpressionCursor Expr(Ops);
+    Ops.append(Expr->elements_begin(), Expr->elements_end());
+    DIExpressionCursor Cursor(Ops);
+    DwarfExpr.setMemoryLocationKind();
    DwarfExpr.addMachineRegExpression(
-        *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg);
-    DwarfExpr.addExpression(std::move(Expr));
+        *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+    DwarfExpr.addExpression(std::move(Cursor));
  }
  addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());

@ -779,12 +780,13 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
                                  const MachineLocation &Location) {
  DIELoc *Loc = new (DIEValueAllocator) DIELoc;
  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+  if (Location.isIndirect())
+    DwarfExpr.setMemoryLocationKind();

  SmallVector<uint64_t, 8> Ops;
-  if (Location.isIndirect()) {
+  if (Location.isIndirect() && Location.getOffset()) {
    Ops.push_back(dwarf::DW_OP_plus);
    Ops.push_back(Location.getOffset());
-    Ops.push_back(dwarf::DW_OP_deref);
  }
  DIExpressionCursor Cursor(Ops);
  const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
@ -807,12 +809,13 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
  const DIExpression *DIExpr = DV.getSingleExpression();
  DwarfExpr.addFragmentOffset(DIExpr);
+  if (Location.isIndirect())
+    DwarfExpr.setMemoryLocationKind();

  SmallVector<uint64_t, 8> Ops;
-  if (Location.isIndirect()) {
+  if (Location.isIndirect() && Location.getOffset()) {
    Ops.push_back(dwarf::DW_OP_plus);
    Ops.push_back(Location.getOffset());
-    Ops.push_back(dwarf::DW_OP_deref);
  }
  Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
  DIExpressionCursor Cursor(Ops);
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@ -1517,13 +1517,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
      DwarfExpr.addUnsignedConstant(Value.getInt());
  } else if (Value.isLocation()) {
    MachineLocation Location = Value.getLoc();
-
+    if (Location.isIndirect())
+      DwarfExpr.setMemoryLocationKind();
    SmallVector<uint64_t, 8> Ops;
-    // FIXME: Should this condition be Location.isIndirect() instead?
-    if (Location.getOffset()) {
+    if (Location.isIndirect() && Location.getOffset()) {
      Ops.push_back(dwarf::DW_OP_plus);
      Ops.push_back(Location.getOffset());
-      Ops.push_back(dwarf::DW_OP_deref);
    }
    Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
    DIExpressionCursor Cursor(Ops);
@ -1578,7 +1577,7 @@ void DwarfDebug::emitDebugLoc() {
  // Start the dwarf loc section.
  Asm->OutStreamer->SwitchSection(
      Asm->getObjFileLowering().getDwarfLocSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->MAI->getCodePointerSize();
  for (const auto &List : DebugLocs.getLists()) {
    Asm->OutStreamer->EmitLabel(List.Label);
    const DwarfCompileUnit *CU = List.CU;
@ -1708,7 +1707,7 @@ void DwarfDebug::emitDebugARanges() {
  Asm->OutStreamer->SwitchSection(
      Asm->getObjFileLowering().getDwarfARangesSection());

-  unsigned PtrSize = Asm->getDataLayout().getPointerSize();
+  unsigned PtrSize = Asm->MAI->getCodePointerSize();

  // Build a list of CUs used.
  std::vector<DwarfCompileUnit *> CUs;
@ -1791,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() {
      Asm->getObjFileLowering().getDwarfRangesSection());

  // Size for our labels.
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->MAI->getCodePointerSize();

  // Grab the specific ranges for the compile units in the module.
  for (const auto &I : CUMap) {
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@ -23,9 +23,12 @@
 using namespace llvm;

 void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
-  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
-  if (DwarfReg < 32) {
-    emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((LocationKind == Unknown || LocationKind == Register) &&
+        "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+   emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
  } else {
    emitOp(dwarf::DW_OP_regx, Comment);
    emitUnsigned(DwarfReg);
@ -34,6 +37,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {

 void DwarfExpression::addBReg(int DwarfReg, int Offset) {
  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+  assert(LocationKind != Register && "location description already locked down");
  if (DwarfReg < 32) {
    emitOp(dwarf::DW_OP_breg0 + DwarfReg);
  } else {
@ -156,18 +160,23 @@ void DwarfExpression::addStackValue() {
 }

 void DwarfExpression::addSignedConstant(int64_t Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
  emitOp(dwarf::DW_OP_consts);
  emitSigned(Value);
-  addStackValue();
 }

 void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
  emitOp(dwarf::DW_OP_constu);
  emitUnsigned(Value);
-  addStackValue();
 }

 void DwarfExpression::addUnsignedConstant(const APInt &Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
+
  unsigned Size = Value.getBitWidth();
  const uint64_t *Data = Value.getRawData();

@ -178,7 +187,8 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) {
    addUnsignedConstant(*Data++);
    if (Offset == 0 && Size <= 64)
      break;
-    addOpPiece(std::min(Size-Offset, 64u), Offset);
+    addStackValue();
+    addOpPiece(std::min(Size - Offset, 64u), Offset);
    Offset += 64;
  }
 }
@ -206,7 +216,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
  }

  // Handle simple register locations.
-  if (!HasComplexExpression) {
+  if (LocationKind != Memory && !HasComplexExpression) {
    for (auto &Reg : DwarfRegs) {
      if (Reg.DwarfRegNo >= 0)
        addReg(Reg.DwarfRegNo, Reg.Comment);
@ -216,62 +226,65 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
    return true;
  }

+  // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+  if (DwarfVersion < 4)
+    if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
+                    [](DIExpression::ExprOperand Op) -> bool {
+                      return Op.getOp() == dwarf::DW_OP_stack_value;
+                    })) {
+      DwarfRegs.clear();
+      return false;
+    }
+
  assert(DwarfRegs.size() == 1);
  auto Reg = DwarfRegs[0];
-  bool FBReg = isFrameRegister(TRI, MachineReg); 
+  bool FBReg = isFrameRegister(TRI, MachineReg);
+  int SignedOffset = 0;
  assert(Reg.Size == 0 && "subregister has same size as superregister");

  // Pattern-match combinations for which more efficient representations exist.
-  switch (Op->getOp()) {
-  default: {
-    if (FBReg)
-      addFBReg(0);
-    else
-      addReg(Reg.DwarfRegNo, 0);
-    break;
-  }
-  case dwarf::DW_OP_plus:
-  case dwarf::DW_OP_minus: {
-    // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
-    // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
-    auto N = ExprCursor.peekNext();
-    if (N && N->getOp() == dwarf::DW_OP_deref) {
-      int Offset = Op->getArg(0);
-      int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
-      if (FBReg)
-        addFBReg(SignedOffset);
-      else
-        addBReg(Reg.DwarfRegNo, SignedOffset);
-
-      ExprCursor.consume(2);
-      break;
-    }
-    addReg(Reg.DwarfRegNo, 0);
-    break;
-  }
-  case dwarf::DW_OP_deref:
-    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
-    if (FBReg)
-      addFBReg(0);
-    else
-      addBReg(Reg.DwarfRegNo, 0);
+  // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset].
+  // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset].
+  // If Reg is a subregister we need to mask it out before subtracting.
+  if (Op && ((Op->getOp() == dwarf::DW_OP_plus) ||
+             (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
+    int Offset = Op->getArg(0);
+    SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
    ExprCursor.take();
-    break;
  }
+  if (FBReg)
+    addFBReg(SignedOffset);
+  else
+    addBReg(Reg.DwarfRegNo, SignedOffset);
  DwarfRegs.clear();
  return true;
 }

+/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
+static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
+  while (ExprCursor) {
+    auto Op = ExprCursor.take();
+    switch (Op->getOp()) {
+    case dwarf::DW_OP_deref:
+    case dwarf::DW_OP_LLVM_fragment:
+      break;
+    default:
+      return false;
+    }
+  }
+  return true;
+}
+
 void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
                                    unsigned FragmentOffsetInBits) {
+  // If we need to mask out a subregister, do it now, unless the next
+  // operation would emit an OpPiece anyway.
+  auto N = ExprCursor.peek();
+  if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
+    maskSubRegister();
+
  while (ExprCursor) {
    auto Op = ExprCursor.take();
-
-    // If we need to mask out a subregister, do it now, unless the next
-    // operation would emit an OpPiece anyway.
-    if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
-      maskSubRegister();
-
    switch (Op->getOp()) {
    case dwarf::DW_OP_LLVM_fragment: {
      unsigned SizeInBits = Op->getArg(1);
@ -281,50 +294,74 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
      // location.
      assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");

-      // If \a addMachineReg already emitted DW_OP_piece operations to represent
+      // If addMachineReg already emitted DW_OP_piece operations to represent
      // a super-register by splicing together sub-registers, subtract the size
      // of the pieces that was already emitted.
      SizeInBits -= OffsetInBits - FragmentOffset;

-      // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a
+      // If addMachineReg requested a DW_OP_bit_piece to stencil out a
      // sub-register that is smaller than the current fragment's size, use it.
      if (SubRegisterSizeInBits)
        SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
-      
+
+      // Emit a DW_OP_stack_value for implicit location descriptions.
+      if (LocationKind == Implicit)
+        addStackValue();
+
+      // Emit the DW_OP_piece.
      addOpPiece(SizeInBits, SubRegisterOffsetInBits);
      setSubRegisterPiece(0, 0);
-      break;
+      // Reset the location description kind.
+      LocationKind = Unknown;
+      return;
    }
    case dwarf::DW_OP_plus:
+      assert(LocationKind != Register);
      emitOp(dwarf::DW_OP_plus_uconst);
      emitUnsigned(Op->getArg(0));
      break;
    case dwarf::DW_OP_minus:
-      // There is no OP_minus_uconst.
+      assert(LocationKind != Register);
+      // There is no DW_OP_minus_uconst.
      emitOp(dwarf::DW_OP_constu);
      emitUnsigned(Op->getArg(0));
      emitOp(dwarf::DW_OP_minus);
      break;
-    case dwarf::DW_OP_deref:
-      emitOp(dwarf::DW_OP_deref);
+    case dwarf::DW_OP_deref: {
+      assert(LocationKind != Register);
+      if (LocationKind != Memory && isMemoryLocation(ExprCursor))
+        // Turning this into a memory location description makes the deref
+        // implicit.
+        LocationKind = Memory;
+      else
+        emitOp(dwarf::DW_OP_deref);
      break;
+    }
    case dwarf::DW_OP_constu:
+      assert(LocationKind != Register);
      emitOp(dwarf::DW_OP_constu);
      emitUnsigned(Op->getArg(0));
      break;
    case dwarf::DW_OP_stack_value:
-      addStackValue();
+      assert(LocationKind == Unknown || LocationKind == Implicit);
+      LocationKind = Implicit;
      break;
    case dwarf::DW_OP_swap:
+      assert(LocationKind != Register);
      emitOp(dwarf::DW_OP_swap);
      break;
    case dwarf::DW_OP_xderef:
+      assert(LocationKind != Register);
      emitOp(dwarf::DW_OP_xderef);
      break;
    default:
      llvm_unreachable("unhandled opcode found in expression");
    }
  }
+
+  if (LocationKind == Implicit)
+    // Turn this into an implicit location description.
+    addStackValue();
 }

 /// add masking operations to stencil out a subregister.
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@ -72,6 +72,8 @@ public:
  }
  /// Determine whether there are any operations left in this expression.
  operator bool() const { return Start != End; }
+  DIExpression::expr_op_iterator begin() const { return Start; }
+  DIExpression::expr_op_iterator end() const { return End; }

  /// Retrieve the fragment information, if any.
  Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
@ -102,6 +104,9 @@ protected:
  unsigned SubRegisterSizeInBits = 0;
  unsigned SubRegisterOffsetInBits = 0;

+  /// The kind of location description being produced.
+  enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+
  /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
  /// to represent a subregister.
  void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
@ -122,7 +127,8 @@ protected:
  /// current function.
  virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;

-  /// Emit a DW_OP_reg operation.
+  /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
+  /// register location description.
  void addReg(int DwarfReg, const char *Comment = nullptr);
  /// Emit a DW_OP_breg operation.
  void addBReg(int DwarfReg, int Offset);
@ -185,11 +191,18 @@ public:
  /// Emit an unsigned constant.
  void addUnsignedConstant(const APInt &Value);

+  /// Lock this down to become a memory location description.
+  void setMemoryLocationKind() {
+    assert(LocationKind == Unknown);
+    LocationKind = Memory;
+  }
+
  /// Emit a machine register location. As an optimization this may also consume
  /// the prefix of a DwarfExpression if a more efficient representation for
  /// combining the register location and the first operation exists.
  ///
-  /// \param FragmentOffsetInBits     If this is one fragment out of a fragmented
+  /// \param FragmentOffsetInBits     If this is one fragment out of a
+  /// fragmented
  ///                                 location, this is the offset of the
  ///                                 fragment inside the entire variable.
  /// \return                         false if no DWARF register exists
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@ -27,6 +27,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
@ -73,8 +74,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,

 DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
                     AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
-    : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
-      Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
+    : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag),
+      CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
 }

 DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@ -471,12 +472,13 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
  // variable's location.
  DIELoc *Loc = new (DIEValueAllocator) DIELoc;
  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+  if (Location.isIndirect())
+    DwarfExpr.setMemoryLocationKind();

  SmallVector<uint64_t, 9> Ops;
-  if (Location.isIndirect()) {
+  if (Location.isIndirect() && Location.getOffset()) {
    Ops.push_back(dwarf::DW_OP_plus);
    Ops.push_back(Location.getOffset());
-    Ops.push_back(dwarf::DW_OP_deref);
  }
  // If we started with a pointer to the __Block_byref... struct, then
  // the first thing we need to do is dereference the pointer (DW_OP_deref).
@ -1546,7 +1548,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
    Asm->OutStreamer->AddComment("DWARF Unit Type");
    Asm->EmitInt8(UT);
    Asm->OutStreamer->AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+    Asm->EmitInt8(Asm->MAI->getCodePointerSize());
  }

  // We share one abbreviations table across all units so it's always at the
@ -1562,7 +1564,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {

  if (Version <= 4) {
    Asm->OutStreamer->AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+    Asm->EmitInt8(Asm->MAI->getCodePointerSize());
  }
 }

--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -570,8 +570,14 @@ bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
    ValueToValueMapTy VMap;
    BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);

-    for (BasicBlock *Pred : OtherPreds)
-      Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+    for (BasicBlock *Pred : OtherPreds) {
+      // If the target is a loop to itself, then the terminator of the split
+      // block needs to be updated.
+      if (Pred == Target)
+        BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+      else
+        Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+    }

    // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
    // they are clones, so the number of PHIs are the same.
@ -5059,16 +5065,14 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
      if (!ShlC)
        return false;
      uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
-      auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
-      DemandBits |= ShlDemandBits;
+      DemandBits.setLowBits(BitWidth - ShiftAmt);
      break;
    }

    case llvm::Instruction::Trunc: {
      EVT TruncVT = TLI->getValueType(*DL, I->getType());
      unsigned TruncBitWidth = TruncVT.getSizeInBits();
-      auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
-      DemandBits |= TruncBits;
+      DemandBits.setLowBits(TruncBitWidth);
      break;
    }

--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@ -381,18 +381,19 @@ bool IRTranslator::translateInsertValue(const User &U,
  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);

  unsigned Res = getOrCreateVReg(U);
-  const Value &Inserted = *U.getOperand(1);
-  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
-                         Offset);
+  unsigned Inserted = getOrCreateVReg(*U.getOperand(1));
+  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset);

  return true;
 }

 bool IRTranslator::translateSelect(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
-  MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
-                         getOrCreateVReg(*U.getOperand(1)),
-                         getOrCreateVReg(*U.getOperand(2)));
+  unsigned Res = getOrCreateVReg(U);
+  unsigned Tst = getOrCreateVReg(*U.getOperand(0));
+  unsigned Op0 = getOrCreateVReg(*U.getOperand(1));
+  unsigned Op1 = getOrCreateVReg(*U.getOperand(2));
+  MIRBuilder.buildSelect(Res, Tst, Op0, Op1);
  return true;
 }

@ -984,9 +985,11 @@ bool IRTranslator::translateInsertElement(const User &U,
    ValToVReg[&U] = Elt;
    return true;
  }
-  MIRBuilder.buildInsertVectorElement(
-      getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
-      getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2)));
+  unsigned Res = getOrCreateVReg(U);
+  unsigned Val = getOrCreateVReg(*U.getOperand(0));
+  unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+  unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+  MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
  return true;
 }

@ -999,9 +1002,10 @@ bool IRTranslator::translateExtractElement(const User &U,
    ValToVReg[&U] = Elt;
    return true;
  }
-  MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U),
-                                       getOrCreateVReg(*U.getOperand(0)),
-                                       getOrCreateVReg(*U.getOperand(1)));
+  unsigned Res = getOrCreateVReg(U);
+  unsigned Val = getOrCreateVReg(*U.getOperand(0));
+  unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+  MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
  return true;
 }

--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@ -68,23 +68,6 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
  return true;
 }

-Optional<int64_t>
-InstructionSelector::getConstantVRegVal(unsigned VReg,
-                                        const MachineRegisterInfo &MRI) const {
-  MachineInstr *MI = MRI.getVRegDef(VReg);
-  if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
-    return None;
-
-  if (MI->getOperand(1).isImm())
-    return MI->getOperand(1).getImm();
-
-  if (MI->getOperand(1).isCImm() &&
-      MI->getOperand(1).getCImm()->getBitWidth() <= 64)
-    return MI->getOperand(1).getCImm()->getSExtValue();
-
-  return None;
-}
-
 bool InstructionSelector::isOperandImmEqual(
    const MachineOperand &MO, int64_t Value,
    const MachineRegisterInfo &MRI) const {
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@ -24,6 +24,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"

+#include <iterator>
+
 #define DEBUG_TYPE "legalizer"

 using namespace llvm;
@ -161,7 +163,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
  // convergence for performance reasons.
  bool Changed = false;
  MachineBasicBlock::iterator NextMI;
-  for (auto &MBB : MF)
+  for (auto &MBB : MF) {
    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
      // Get the next Instruction before we try to legalize, because there's a
      // good chance MI will be deleted.
@ -171,18 +173,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
      // and are assumed to be legal.
      if (!isPreISelGenericOpcode(MI->getOpcode()))
        continue;
+      unsigned NumNewInsns = 0;
      SmallVector<MachineInstr *, 4> WorkList;
-      Helper.MIRBuilder.recordInsertions(
-          [&](MachineInstr *MI) { WorkList.push_back(MI); });
+      Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+        ++NumNewInsns;
+        WorkList.push_back(MI);
+      });
      WorkList.push_back(&*MI);

+      bool Changed = false;
      LegalizerHelper::LegalizeResult Res;
      unsigned Idx = 0;
      do {
        Res = Helper.legalizeInstrStep(*WorkList[Idx]);
        // Error out if we couldn't legalize this instruction. We may want to
-        // fall
-        // back to DAG ISel instead in the future.
+        // fall back to DAG ISel instead in the future.
        if (Res == LegalizerHelper::UnableToLegalize) {
          Helper.MIRBuilder.stopRecordingInsertions();
          if (Res == LegalizerHelper::UnableToLegalize) {
@ -194,10 +199,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
        }
        Changed |= Res == LegalizerHelper::Legalized;
        ++Idx;
+
+#ifndef NDEBUG
+        if (NumNewInsns) {
+          DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
+          for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
+               I != E; ++I)
+            DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
+          NumNewInsns = 0;
+        }
+#endif
      } while (Idx < WorkList.size());

      Helper.MIRBuilder.stopRecordingInsertions();
    }
+  }

  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@ -207,7 +223,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
      // good chance MI will be deleted.
      NextMI = std::next(MI);

-      Changed |= combineExtracts(*MI, MRI, TII);
+      // combineExtracts erases MI.
+      if (combineExtracts(*MI, MRI, TII)) {
+        Changed = true;
+        continue;
+      }
      Changed |= combineMerges(*MI, MRI, TII);
    }
  }
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@ -24,7 +24,7 @@

 #include <sstream>

-#define DEBUG_TYPE "legalize-mir"
+#define DEBUG_TYPE "legalizer"

 using namespace llvm;

@ -35,24 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF)

 LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+  DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+
  auto Action = LI.getAction(MI, MRI);
  switch (std::get<0>(Action)) {
  case LegalizerInfo::Legal:
+    DEBUG(dbgs() << ".. Already legal\n");
    return AlreadyLegal;
  case LegalizerInfo::Libcall:
+    DEBUG(dbgs() << ".. Convert to libcall\n");
    return libcall(MI);
  case LegalizerInfo::NarrowScalar:
+    DEBUG(dbgs() << ".. Narrow scalar\n");
    return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
  case LegalizerInfo::WidenScalar:
+    DEBUG(dbgs() << ".. Widen scalar\n");
    return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
  case LegalizerInfo::Lower:
+    DEBUG(dbgs() << ".. Lower\n");
    return lower(MI, std::get<1>(Action), std::get<2>(Action));
  case LegalizerInfo::FewerElements:
+    DEBUG(dbgs() << ".. Reduce number of elements\n");
    return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
  case LegalizerInfo::Custom:
+    DEBUG(dbgs() << ".. Custom legalization\n");
    return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
                                                  : UnableToLegalize;
  default:
+    DEBUG(dbgs() << ".. Unable to legalize\n");
    return UnableToLegalize;
  }
 }
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@ -592,7 +592,7 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
  LLT EltTy = MRI->getType(Elt);
  LLT IdxTy = MRI->getType(Idx);
  assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
-  assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+  assert(IdxTy.isScalar() && "invalid operand type");
  assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
  assert(ResTy.getElementType() == EltTy && "type mismatch");
 #endif
@ -612,7 +612,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
  LLT ValTy = MRI->getType(Val);
  LLT IdxTy = MRI->getType(Idx);
  assert(ValTy.isVector() && "invalid operand type");
-  assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+  assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
+  assert(IdxTy.isScalar() && "invalid operand type");
  assert(ValTy.getElementType() == ResTy && "type mismatch");
 #endif

--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"

@ -93,3 +94,19 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
  R << Msg << ": " << ore::MNV("Inst", MI);
  reportGISelFailure(MF, TPC, MORE, R);
 }
+
+Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
+                                           const MachineRegisterInfo &MRI) {
+  MachineInstr *MI = MRI.getVRegDef(VReg);
+  if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+    return None;
+
+  if (MI->getOperand(1).isImm())
+    return MI->getOperand(1).getImm();
+
+  if (MI->getOperand(1).isCImm() &&
+      MI->getOperand(1).getCImm()->getBitWidth() <= 64)
+    return MI->getOperand(1).getCImm()->getSExtValue();
+
+  return None;
+}
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@ -888,20 +888,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
    // Debug values are not allowed to affect codegen.
    if (MI->isDebugValue()) {
      // Modify DBG_VALUE now that the value is in a spill slot.
-      bool IsIndirect = MI->isIndirectDebugValue();
-      uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
-      const MDNode *Var = MI->getDebugVariable();
-      const MDNode *Expr = MI->getDebugExpression();
-      DebugLoc DL = MI->getDebugLoc();
-      DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
      MachineBasicBlock *MBB = MI->getParent();
-      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
-             "Expected inlined-at fields to agree");
-      BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
-          .addFrameIndex(StackSlot)
-          .addImm(Offset)
-          .addMetadata(Var)
-          .addMetadata(Expr);
+      DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
+      buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);
+      MBB->erase(MI);
      continue;
    }

--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@ -21,10 +21,10 @@ using namespace llvm;
 LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
  if (auto VTy = dyn_cast<VectorType>(&Ty)) {
    auto NumElements = VTy->getNumElements();
-    auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
    if (NumElements == 1)
-      return LLT::scalar(ScalarSizeInBits);
-    return LLT::vector(NumElements, ScalarSizeInBits);
+      return ScalarTy;
+    return LLT::vector(NumElements, ScalarTy);
  } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
    return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
  } else if (Ty.isSized()) {
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@ -2351,3 +2351,31 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
  BB.insert(I, MI);
  return MachineInstrBuilder(MF, MI);
 }
+
+MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
+                                          MachineBasicBlock::iterator I,
+                                          const MachineInstr &Orig,
+                                          int FrameIndex) {
+  const MDNode *Var = Orig.getDebugVariable();
+  auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
+  bool IsIndirect = Orig.isIndirectDebugValue();
+  uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
+  DebugLoc DL = Orig.getDebugLoc();
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  // If the DBG_VALUE already was a memory location, add an extra
+  // DW_OP_deref. Otherwise just turning this from a register into a
+  // memory/indirect location is sufficient.
+  if (IsIndirect) {
+    SmallVector<uint64_t, 8> Ops;
+    Ops.push_back(dwarf::DW_OP_deref);
+    if (Expr)
+      Ops.append(Expr->elements_begin(), Expr->elements_end());
+    Expr = DIExpression::get(Expr->getContext(), Ops);
+  }
+  return BuildMI(BB, I, DL, Orig.getDesc())
+      .addFrameIndex(FrameIndex)
+      .addImm(Offset)
+      .addMetadata(Var)
+      .addMetadata(Expr);
+}
--- a/Show More
+++ b/Show More