diff --git a/CMakeLists.txt b/CMakeLists.txt
index 96e06082fde6..9d0180b86ff8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -217,7 +217,9 @@ if( MSVC )
   # List of valid CRTs for MSVC
   set(MSVC_CRT
     MD
-    MDd)
+    MDd
+    MT
+    MTd)
 
   set(LLVM_USE_CRT "" CACHE STRING "Specify VC++ CRT to use for debug/release configurations.")
   add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
diff --git a/Makefile.rules b/Makefile.rules
index 20e642a4505b..7f298a995422 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -578,8 +578,6 @@ endif
 ifeq ($(TARGET_OS),Darwin)
   ifneq ($(ARCH),ARM)
     TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
-  else
-    TargetCommonOpts += -marm
   endif
 endif
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 22f15b7feb01..56d716bb8da7 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,12 +31,12 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.7svn]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[2.8svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we
 dnl use some autoconf macros only available in 2.59.
@@ -62,6 +62,41 @@ dnl Configure all of the projects present in our source tree. While we could
 dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
 dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated.
 dnl Instead we match on the known projects.
+
+dnl
+dnl One tricky part of doing this is that some projects depend upon other
+dnl projects.  For example, several projects rely upon the LLVM test suite.
+dnl We want to configure those projects first so that their object trees are
+dnl created before running the configure scripts of projects that depend upon
+dnl them.
+dnl
+
+dnl Several projects use llvm-gcc, so configure that first
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-gcc])
+fi
+
+dnl Several projects use the LLVM test suite, so configure it next.
+if test -d ${srcdir}/projects/test-suite ; then
+  AC_CONFIG_SUBDIRS([projects/test-suite])
+fi
+
+dnl llvm-test is the old name of the test-suite, kept here for backwards
+dnl compatibility
+if test -d ${srcdir}/projects/llvm-test ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-test])
+fi
+
+dnl Some projects use poolalloc; configure that next
+if test -d ${srcdir}/projects/poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/poolalloc])
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-poolalloc])
+fi
+
+dnl Check for all other projects
 for i in `ls ${srcdir}/projects`
 do
   if test -d ${srcdir}/projects/${i} ; then
@@ -70,16 +105,9 @@ do
       sample)       AC_CONFIG_SUBDIRS([projects/sample])    ;;
       privbracket)  AC_CONFIG_SUBDIRS([projects/privbracket]) ;;
       llvm-stacker) AC_CONFIG_SUBDIRS([projects/llvm-stacker]) ;;
-      # llvm-test is the old name of the test-suite, kept here for backwards
-      # compatibility
-      llvm-test)    AC_CONFIG_SUBDIRS([projects/llvm-test]) ;;
-      test-suite)   AC_CONFIG_SUBDIRS([projects/test-suite]) ;;
       llvm-reopt)   AC_CONFIG_SUBDIRS([projects/llvm-reopt]);;
-      llvm-gcc)     AC_CONFIG_SUBDIRS([projects/llvm-gcc])  ;;
       llvm-java)    AC_CONFIG_SUBDIRS([projects/llvm-java]) ;;
       llvm-tv)      AC_CONFIG_SUBDIRS([projects/llvm-tv])   ;;
-      llvm-poolalloc) AC_CONFIG_SUBDIRS([projects/llvm-poolalloc]) ;;
-      poolalloc)    AC_CONFIG_SUBDIRS([projects/poolalloc]) ;;
       safecode)     AC_CONFIG_SUBDIRS([projects/safecode]) ;;
       llvm-kernel)  AC_CONFIG_SUBDIRS([projects/llvm-kernel]) ;;
       *)
diff --git a/configure b/configure
index 175297e543cd..a2aad3ea3ced 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.7svn.
+# Generated by GNU Autoconf 2.60 for llvm 2.8svn.
 #
 # Report bugs to <llvmbugs@cs.uiuc.edu>.
 #
@@ -9,7 +9,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='llvm'
 PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.7svn'
-PACKAGE_STRING='llvm 2.7svn'
+PACKAGE_VERSION='2.8svn'
+PACKAGE_STRING='llvm 2.8svn'
 PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
 
 ac_unique_file="lib/VMCore/Module.cpp"
@@ -802,17 +802,17 @@ CPP
 CXX
 CXXFLAGS
 CCC'
-ac_subdirs_all='projects/sample
+ac_subdirs_all='projects/llvm-gcc
+projects/test-suite
+projects/llvm-test
+projects/poolalloc
+projects/llvm-poolalloc
+projects/sample
 projects/privbracket
 projects/llvm-stacker
-projects/llvm-test
-projects/test-suite
 projects/llvm-reopt
-projects/llvm-gcc
 projects/llvm-java
 projects/llvm-tv
-projects/llvm-poolalloc
-projects/poolalloc
 projects/safecode
 projects/llvm-kernel'
 
@@ -1316,7 +1316,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 2.7svn to adapt to many kinds of systems.
+\`configure' configures llvm 2.8svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1382,7 +1382,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 2.7svn:";;
+     short | recursive ) echo "Configuration of llvm 2.8svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1533,7 +1533,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 2.7svn
+llvm configure 2.8svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1541,7 +1541,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1549,7 +1549,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 2.7svn, which was
+It was created by llvm $as_me 2.8svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1903,7 +1903,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."
+LLVM_COPYRIGHT="Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."
 
 
 
@@ -1951,6 +1951,33 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;}
   fi
 fi
 
+
+
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  subdirs="$subdirs projects/llvm-gcc"
+
+fi
+
+if test -d ${srcdir}/projects/test-suite ; then
+  subdirs="$subdirs projects/test-suite"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-test ; then
+  subdirs="$subdirs projects/llvm-test"
+
+fi
+
+if test -d ${srcdir}/projects/poolalloc ; then
+  subdirs="$subdirs projects/poolalloc"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  subdirs="$subdirs projects/llvm-poolalloc"
+
+fi
+
 for i in `ls ${srcdir}/projects`
 do
   if test -d ${srcdir}/projects/${i} ; then
@@ -1961,25 +1988,13 @@ do
       privbracket)  subdirs="$subdirs projects/privbracket"
  ;;
       llvm-stacker) subdirs="$subdirs projects/llvm-stacker"
- ;;
-      # llvm-test is the old name of the test-suite, kept here for backwards
-      # compatibility
-      llvm-test)    subdirs="$subdirs projects/llvm-test"
- ;;
-      test-suite)   subdirs="$subdirs projects/test-suite"
  ;;
       llvm-reopt)   subdirs="$subdirs projects/llvm-reopt"
 ;;
-      llvm-gcc)     subdirs="$subdirs projects/llvm-gcc"
-  ;;
       llvm-java)    subdirs="$subdirs projects/llvm-java"
  ;;
       llvm-tv)      subdirs="$subdirs projects/llvm-tv"
    ;;
-      llvm-poolalloc) subdirs="$subdirs projects/llvm-poolalloc"
- ;;
-      poolalloc)    subdirs="$subdirs projects/poolalloc"
- ;;
       safecode)     subdirs="$subdirs projects/safecode"
  ;;
       llvm-kernel)  subdirs="$subdirs projects/llvm-kernel"
@@ -11136,7 +11151,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11139 "configure"
+#line 11154 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -20608,7 +20623,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 2.7svn, which was
+This file was extended by llvm $as_me 2.8svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20661,7 +20676,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 2.7svn
+llvm config.status 2.8svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 5bdbceb51da9..53a018aefbc7 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -10,6 +10,9 @@
 
 <div class="doc_title">LLVM 2.7 Release Notes</div>
 
+<img align=right src="http://llvm.org/img/DragonSmall.png"
+    width="136" height="136">
+
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
@@ -48,14 +51,18 @@ href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
 <p>For more information about LLVM, including information about the latest
 release, please check out the <a href="http://llvm.org/">main LLVM
 web site</a>.  If you have questions or comments, the <a
-href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's Mailing
-List</a> is a good place to send them.</p>
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's
+Mailing List</a> is a good place to send them.</p>
 
 <p>Note that if you are reading this file from a Subversion checkout or the
 main LLVM web page, this document applies to the <i>next</i> release, not the
 current one.  To see the release notes for a specific release, please see the
 <a href="http://llvm.org/releases/">releases page</a>.</p>
 
+
+<p>FIXME: llvm.org moved to new server, mention new logo, Ted and Doug new code
+   owners.</p>
+
 </div>
  
 
@@ -66,6 +73,7 @@ Almost dead code.
   llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
   ABCD, SCCVN, GEPSplitterPass
   MSIL backend?
+  lib/Transforms/Utils/SSI.cpp  -> ABCD depends on it.
 -->
  
    
@@ -78,8 +86,6 @@ Almost dead code.
   loop dependence analysis
   ELF Writer?  How stable?
   <li>PostRA scheduler improvements, ARM adoption (David Goodwin).</li>
-  2.7 supports the GDB 7.0 jit interfaces for debug info.
-  2.7 eliminates ADT/iterator.h
  -->
 
  <!-- for announcement email:
@@ -88,8 +94,7 @@ Almost dead code.
  compiler_rt
  KLEE web page at klee.llvm.org
  Many new papers added to /pubs/
-   Mention gcc plugin.
-
+ Mention gcc plugin.
    -->
 
 <!-- *********************************************************************** -->
@@ -123,6 +128,7 @@ development.  Here we include updates on these subprojects.
 
 <ul>
 <li>...</li>
+include a link to cxx_compatibility.html
 </ul>
 </div>
 
@@ -139,7 +145,7 @@ href="http://clang.llvm.org/StaticAnalysis.html">automatically finding bugs</a>
 in C and Objective-C programs. The tool performs checks to find
 bugs that occur on a specific path within a program.</p>
 
-<p>In the LLVM 2.7 time-frame, the analyzer core has ...</p>
+<p>In the LLVM 2.7 time-frame, the analyzer core has sprouted legs and...</p>
 
 </div>
 
@@ -190,24 +196,6 @@ License, a "BSD-style" license.</p>
 
 </div>
 
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="klee">KLEE: Symbolic Execution and Automatic Test Case Generator</a>
-</div>
-
-<div class="doc_text">
-<p>
-The new LLVM <a href="http://klee.llvm.org/">KLEE project</a> is a symbolic
-execution framework for programs in LLVM bitcode form.  KLEE tries to
-symbolically evaluate "all" paths through the application and records state
-transitions that lead to fault states.  This allows it to construct testcases
-that lead to faults and can even be used to verify algorithms.  For more
-details, please see the <a
-href="http://llvm.org/pubs/2008-12-OSDI-KLEE.html">OSDI 2008 paper</a> about
-KLEE.</p>
-
-</div>
-
 <!--=========================================================================-->
 <div class="doc_subsection">
 <a name="dragonegg">DragonEgg: GCC-4.5 as an LLVM frontend</a>
@@ -257,6 +245,8 @@ The LLVM Machine Code (MC) Toolkit project is ...
 </div>
 
 <div class="doc_text">
+Need update.
+<!--
 <p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
 for running Ruby code which strives to write as much of the core class
 implementation in Ruby as possible. Combined with a bytecode interpreting VM, it
@@ -266,6 +256,7 @@ remove dynamism from ruby execution and increase performance.</p>
 
 <p>Since LLVM 2.5, Rubinius has made several major leaps forward, implementing
 a counter based JIT, type feedback and speculative method inlining.
+-->
 </p>
 
 </div>
@@ -278,6 +269,8 @@ a counter based JIT, type feedback and speculative method inlining.
 <div class="doc_text">
 
 <p>
+Need update.
+<!--
 <a href="http://macruby.org">MacRuby</a> is an implementation of Ruby on top of
 core Mac OS X technologies, such as the Objective-C common runtime and garbage
 collector and the CoreFoundation framework. It is principally developed by
@@ -287,7 +280,7 @@ Apple and aims at enabling the creation of full-fledged Mac OS X applications.
 <p>
 MacRuby uses LLVM for optimization passes, JIT and AOT compilation of Ruby
 expressions. It also uses zero-cost DWARF exceptions to implement Ruby exception
-handling.</p>
+handling.--> </p>
 
 </div>
 
@@ -308,9 +301,9 @@ built-in list and matrix support (including list and matrix comprehensions) and
 an easy-to-use C interface. The interpreter uses LLVM as a backend to
  JIT-compile Pure programs to fast native code.</p>
 
-<p>Pure versions ??? and later have been tested and are known to work with
-LLVM 2.7 (and continue to work with older LLVM releases >= 2.3 as well).
-</p>
+<p>Pure versions 0.43 and later have been tested and are known to work with
+LLVM 2.7 (and continue to work with older LLVM releases >= 2.5).</p>
+
 </div>
 
 
@@ -321,6 +314,8 @@ LLVM 2.7 (and continue to work with older LLVM releases >= 2.3 as well).
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://www.dsource.org/projects/ldc">LDC</a> is an implementation of
 the D Programming Language using the LLVM optimizer and code generator.
 The LDC project works great with the LLVM 2.6 release.  General improvements in
@@ -328,7 +323,7 @@ this
 cycle have included new inline asm constraint handling, better debug info
 support, general bug fixes and better x86-64 support.  This has allowed
 some major improvements in LDC, getting it much closer to being as
-fully featured as the original DMD compiler from DigitalMars.
+fully featured as the original DMD compiler from DigitalMars.-->
 </p>
 </div>
 
@@ -342,7 +337,8 @@ fully featured as the original DMD compiler from DigitalMars.
 <a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
 source implementation of the PHP programming 
 language that uses LLVM for its optimizer, JIT and static compiler. This is a 
-reimplementation of an earlier project that is now based on LLVM.</p>
+reimplementation of an earlier project that is now based on LLVM.
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -355,7 +351,8 @@ reimplementation of an earlier project that is now based on LLVM.</p>
 <a href="http://code.google.com/p/unladen-swallow/">Unladen Swallow</a> is a
 branch of <a href="http://python.org/">Python</a> intended to be fully
 compatible and significantly faster.  It uses LLVM's optimization passes and JIT
-compiler.</p>
+compiler.
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -365,10 +362,13 @@ compiler.</p>
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM to add JIT
 and static compiling support to the Lua VM.  Lua bytecode is analyzed to
 remove type checks, then LLVM is used to compile the bytecode down to machine
-code.</p>
+code.-->
+</p>
 </div>
 
 <!--=========================================================================-->
@@ -378,13 +378,15 @@ code.</p>
 
 <div class="doc_text">
 <p>
+Need update.
+<!--
 <a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
 harness to build OpenJDK using only free software build tools and to provide
 replacements for the not-yet free parts of OpenJDK.  One of the extensions that
 IcedTea provides is a new JIT compiler named <a
 href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
 to provide native code generation without introducing processor-dependent
-code.
+code.-->
 </p>
 </div>
 
@@ -418,6 +420,54 @@ in this section.
 <li>...</li>
 </ul>
 
+Extensible metadata solid.
+
+Debug info improvements: using metadata instead of llvm.dbg global variables.
+This brings several enhancements including improved compile times.
+
+New instruction selector.
+GHC Haskell ABI/ calling conv support.
+Pre-Alpha support for unions in IR.
+New InlineHint and StackAlignment function attributes
+Code generator MC'ized except for debug info and EH.
+New SCEV AA pass: -scev-aa
+Inliner reuses arrays allocas when inlining multiple callers to reduce stack usage.
+MC encoding and disassembler apis.
+Optimal Edge Profiling?
+Instcombine is now a library, has its own IRBuilder to simplify itself.
+New llvm/Support/Regex.h API.  FileCheck now does regex's
+Many subtle pointer invalidation bugs in Callgraph have been fixed and it now uses asserting value handles.
+MC Disassembler (with blog post), MCInstPrinter.  Many X86 backend and AsmPrinter simplifications
+Various tools like llc and opt now read either .ll or .bc files as input.
+Malloc and free instructions got removed.
+compiler-rt support for ARM.
+completely llvm-gcc NEON support.
+Can transcode from GAS to intel syntax with "llvm-mc foo.s -output-asm-variant=1"
+JIT debug information with GDB 7.0
+New CodeGen Level CSE
+CMake can now run tests, what other improvements?
+ARM/Thumb using reg scavenging for stack object address materialization (PEI).
+New SSAUpdater and MachineSSAUpdater classes for unstructured ssa updating,
+  changed jump threading, GVN, etc to use it which simplified them and speed
+  them up.
+Combiner-AA improvements, why not on by default?
+Pre-regalloc tail duplication
+x86 sibcall optimization
+New LSR with full strength reduction mode
+The most awesome sext / zext optimization pass. ?
+
+
+
+CondProp pass removed (functionality merged into jump threading).
+AndersAA got removed (from 2.7 or mainline?)
+PredSimplify, LoopVR, GVNPRE got removed.
+LLVM command line tools now overwrite their output, before they would only do this with -f.
+DOUT removed, use DEBUG(errs() instead.
+Much stuff converted to use raw_ostream instead of std::ostream.
+TargetAsmInfo renamed to MCAsmInfo
+llvm/ADT/iterator.h gone.
+
+
 </div>
 
 <!--=========================================================================-->
@@ -699,13 +749,7 @@ listed by component.  If you run into a problem, please check the <a
 href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
 there isn't already one.</p>
 
-<ul>
-<li>The llvm-gcc bootstrap will fail with some versions of binutils (e.g. 2.15)
-    with a message of "<tt><a href="http://llvm.org/PR5004">Error: can not do 8
-    byte pc-relative relocation</a></tt>" when building C++ code.  We intend to
-    fix this on mainline, but a workaround is to upgrade to binutils 2.17 or
-    later.</li>
-    
+<ul>    
 <li>LLVM will not correctly compile on Solaris and/or OpenSolaris
 using the stock GCC 3.x.x series 'out the box',
 See: <a href="GettingStarted.html#brokengcc">Broken versions of GCC and other tools</a>.
@@ -731,10 +775,11 @@ components, please contact us on the <a
 href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 
 <ul>
-<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430 and SystemZ backends are
-    experimental.</li>
+<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze
+    backends are experimental.</li>
 <li>The <tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
-    supported value for this option.  The ELF writer is experimental.</li>
+    supported value for this option.  The MachO writer is experimental, and
+    works much better in mainline SVN.</li>
 </ul>
 
 </div>
@@ -865,7 +910,7 @@ appropriate nops inserted to ensure restartability.</li>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="c-fe">Known problems with the llvm-gcc C front-end</a>
+  <a name="c-fe">Known problems with the llvm-gcc C and C++ front-end</a>
 </div>
 
 <div class="doc_text">
@@ -881,24 +926,6 @@ appropriate nops inserted to ensure restartability.</li>
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection">
-  <a name="c++-fe">Known problems with the llvm-gcc C++ front-end</a>
-</div>
-
-<div class="doc_text">
-
-<p>The C++ front-end is considered to be fully
-tested and works for a number of non-trivial programs, including LLVM
-itself, Qt, Mozilla, etc.</p>
-
-<ul>
-<li>Exception handling works well on the X86 and PowerPC targets. Currently
-  only Linux and Darwin targets are supported (both 32 and 64 bit).</li>
-</ul>
-
-</div>
-
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="fortran-fe">Known problems with the llvm-gcc Fortran front-end</a>
@@ -997,7 +1024,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-01 20:29:17 +0100 (Mon, 01 Mar 2010) $
+  Last modified: $Date: 2010-03-19 04:18:05 +0100 (Fri, 19 Mar 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 9af8fabfefd3..7fffece2e0e4 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -237,7 +237,7 @@ height="369">
 <p>LLVM debugging information has been carefully designed to make it possible
    for the optimizer to optimize the program and debugging information without
    necessarily having to know anything about debugging information.  In
-   particular, te use of metadadta avoids duplicated dubgging information from
+   particular, the use of metadata avoids duplicated debugging information from
    the beginning, and the global dead code elimination pass automatically 
    deletes debugging information for a function if it decides to delete the 
    function. </p>
@@ -370,7 +370,7 @@ height="369">
 </pre>
 </div>
 
-<p>These descriptors contain informations for a file. Global variables and top
+<p>These descriptors contain information for a file. Global variables and top
    level functions would be defined using this context.k File descriptors also
    provide context for source line correspondence. </p>
 
@@ -967,7 +967,7 @@ call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14
 </pre>
 </div>
 
-<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declaread at line number 5 and
+<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declared at line number 5 and
    column number 9 inside of lexical scope <tt>!13</tt>. The lexical scope
    itself resides inside of lexical scope <tt>!1</tt> described above.</p>
 
@@ -1762,7 +1762,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-03-09 01:44:10 +0100 (Tue, 09 Mar 2010) $
+  Last modified: $Date: 2010-03-17 16:01:50 +0100 (Wed, 17 Mar 2010) $
 </address>
 
 </body>
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index b9f2d8332263..3a86b0d34368 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -329,7 +329,8 @@ public:
     Size = RHS.size();
     unsigned RHSWords = NumBitWords(Size);
     if (Size <= Capacity * BITWORD_SIZE) {
-      std::copy(RHS.Bits, &RHS.Bits[RHSWords], Bits);
+      if (Size)
+        std::copy(RHS.Bits, &RHS.Bits[RHSWords], Bits);
       clear_unused_bits();
       return *this;
     }
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index f5f3d496f851..5f89823e8ba1 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -16,6 +16,7 @@
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
 #include "llvm/System/DataTypes.h"
+#include <cassert>
 #include <set>
 
 namespace llvm {
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 81dc46929034..e8979bb076de 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -23,6 +23,7 @@
 namespace llvm {
   class APFloat;
   class APInt;
+  class BumpPtrAllocator;
 
 /// This folding set used for two purposes:
 ///   1. Given information about a node we want to create, look up the unique
@@ -197,6 +198,23 @@ template<typename T> struct FoldingSetTrait {
   static inline void Profile(T& X, FoldingSetNodeID& ID) { X.Profile(ID); }
 };
 
+//===--------------------------------------------------------------------===//
+/// FoldingSetNodeIDRef - This class describes a reference to an interned
+/// FoldingSetNodeID, which can be a useful to store node id data rather
+/// than using plain FoldingSetNodeIDs, since the 32-element SmallVector
+/// is often much larger than necessary, and the possibility of heap
+/// allocation means it requires a non-trivial destructor call.
+class FoldingSetNodeIDRef {
+  unsigned* Data;
+  size_t Size;
+public:
+  FoldingSetNodeIDRef() : Data(0), Size(0) {}
+  FoldingSetNodeIDRef(unsigned *D, size_t S) : Data(D), Size(S) {}
+
+  unsigned *getData() const { return Data; }
+  size_t getSize() const { return Size; }
+};
+
 //===--------------------------------------------------------------------===//
 /// FoldingSetNodeID - This class is used to gather all the unique data bits of
 /// a node.  When all the bits are gathered this class is used to produce a
@@ -210,11 +228,8 @@ class FoldingSetNodeID {
 public:
   FoldingSetNodeID() {}
 
-  /// getRawData - Return the ith entry in the Bits data.
-  ///
-  unsigned getRawData(unsigned i) const {
-    return Bits[i];
-  }
+  FoldingSetNodeID(FoldingSetNodeIDRef Ref)
+    : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {}
 
   /// Add* - Add various data types to Bit data.
   ///
@@ -242,6 +257,11 @@ public:
   /// operator== - Used to compare two nodes to each other.
   ///
   bool operator==(const FoldingSetNodeID &RHS) const;
+
+  /// Intern - Copy this node's data to a memory region allocated from the
+  /// given allocator and return a FoldingSetNodeIDRef describing the
+  /// interned data.
+  FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const;
 };
 
 // Convenience type to hide the implementation of the folding set.
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 89acefd06186..c2afb7e681d1 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -57,17 +57,18 @@ protected:
   // something else.  An array of char would work great, but might not be
   // aligned sufficiently.  Instead, we either use GCC extensions, or some
   // number of union instances for the space, which guarantee maximal alignment.
+  struct U {
 #ifdef __GNUC__
-  typedef char U;
-  U FirstEl __attribute__((aligned));
+    char X __attribute__((aligned));
 #else
-  union U {
-    double D;
-    long double LD;
-    long long L;
-    void *P;
-  } FirstEl;
+    union {
+      double D;
+      long double LD;
+      long long L;
+      void *P;
+    } X;
 #endif
+  } FirstEl;
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
   
 protected:
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 96d29ba05707..ab13a9dfa4e5 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -49,7 +49,11 @@ namespace llvm {
   /// are opaque objects that the client is not allowed to do much with
   /// directly.
   ///
-  class SCEV : public FastFoldingSetNode {
+  class SCEV : public FoldingSetNode {
+    /// FastID - A reference to an Interned FoldingSetNodeID for this node.
+    /// The ScalarEvolution's BumpPtrAllocator holds the data.
+    FoldingSetNodeIDRef FastID;
+
     // The SCEV baseclass this node corresponds to
     const unsigned short SCEVType;
 
@@ -64,11 +68,14 @@ namespace llvm {
   protected:
     virtual ~SCEV();
   public:
-    explicit SCEV(const FoldingSetNodeID &ID, unsigned SCEVTy) :
-      FastFoldingSetNode(ID), SCEVType(SCEVTy), SubclassData(0) {}
+    explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
+      FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
     unsigned getSCEVType() const { return SCEVType; }
 
+    /// Profile - FoldingSet support.
+    void Profile(FoldingSetNodeID& ID) { ID = FastID; }
+
     /// isLoopInvariant - Return true if the value of this SCEV is unchanging in
     /// the specified loop.
     virtual bool isLoopInvariant(const Loop *L) const = 0;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 26dc0c4a54a6..dc9b73bd566d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -79,12 +79,7 @@ namespace llvm {
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
     /// specified block.
-    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I) {
-      BasicBlock::iterator IP = I;
-      while (isInsertedInstruction(IP)) ++IP;
-      Builder.SetInsertPoint(IP->getParent(), IP);
-      return expandCodeFor(SH, Ty);
-    }
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I);
 
     /// setIVIncInsertPos - Set the current IV increment loop and position.
     void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
@@ -109,6 +104,13 @@ namespace llvm {
     /// is useful for late optimization passes.
     void disableCanonicalMode() { CanonicalMode = false; }
 
+    /// clearInsertPoint - Clear the current insertion point. This is useful
+    /// if the instruction that had been serving as the insertion point may
+    /// have been deleted.
+    void clearInsertPoint() {
+      Builder.ClearInsertionPoint();
+    }
+
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 0ab3b3f4a5e2..74242031eddd 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -37,7 +37,7 @@ namespace llvm {
     friend class ScalarEvolution;
 
     ConstantInt *V;
-    SCEVConstant(const FoldingSetNodeID &ID, ConstantInt *v) :
+    SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
       SCEV(ID, scConstant), V(v) {}
   public:
     ConstantInt *getValue() const { return V; }
@@ -81,7 +81,7 @@ namespace llvm {
     const SCEV *Op;
     const Type *Ty;
 
-    SCEVCastExpr(const FoldingSetNodeID &ID,
+    SCEVCastExpr(const FoldingSetNodeIDRef ID,
                  unsigned SCEVTy, const SCEV *op, const Type *ty);
 
   public:
@@ -120,7 +120,7 @@ namespace llvm {
   class SCEVTruncateExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVTruncateExpr(const FoldingSetNodeID &ID,
+    SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                      const SCEV *op, const Type *ty);
 
   public:
@@ -140,7 +140,7 @@ namespace llvm {
   class SCEVZeroExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+    SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                        const SCEV *op, const Type *ty);
 
   public:
@@ -160,7 +160,7 @@ namespace llvm {
   class SCEVSignExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+    SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                        const SCEV *op, const Type *ty);
 
   public:
@@ -180,25 +180,27 @@ namespace llvm {
   ///
   class SCEVNAryExpr : public SCEV {
   protected:
-    SmallVector<const SCEV *, 8> Operands;
+    // Since SCEVs are immutable, ScalarEvolution allocates operand
+    // arrays with its SCEVAllocator, so this class just needs a simple
+    // pointer rather than a more elaborate vector-like data structure.
+    // This also avoids the need for a non-trivial destructor.
+    const SCEV *const *Operands;
+    size_t NumOperands;
 
-    SCEVNAryExpr(const FoldingSetNodeID &ID,
-                 enum SCEVTypes T, const SmallVectorImpl<const SCEV *> &ops)
-      : SCEV(ID, T), Operands(ops.begin(), ops.end()) {}
+    SCEVNAryExpr(const FoldingSetNodeIDRef ID,
+                 enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEV(ID, T), Operands(O), NumOperands(N) {}
 
   public:
-    unsigned getNumOperands() const { return (unsigned)Operands.size(); }
+    size_t getNumOperands() const { return NumOperands; }
     const SCEV *getOperand(unsigned i) const {
-      assert(i < Operands.size() && "Operand index out of range!");
+      assert(i < NumOperands && "Operand index out of range!");
       return Operands[i];
     }
 
-    const SmallVectorImpl<const SCEV *> &getOperands() const {
-      return Operands;
-    }
-    typedef SmallVectorImpl<const SCEV *>::const_iterator op_iterator;
-    op_iterator op_begin() const { return Operands.begin(); }
-    op_iterator op_end() const { return Operands.end(); }
+    typedef const SCEV *const *op_iterator;
+    op_iterator op_begin() const { return Operands; }
+    op_iterator op_end() const { return Operands + NumOperands; }
 
     virtual bool isLoopInvariant(const Loop *L) const {
       for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
@@ -260,10 +262,9 @@ namespace llvm {
   ///
   class SCEVCommutativeExpr : public SCEVNAryExpr {
   protected:
-    SCEVCommutativeExpr(const FoldingSetNodeID &ID,
-                        enum SCEVTypes T,
-                        const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVNAryExpr(ID, T, ops) {}
+    SCEVCommutativeExpr(const FoldingSetNodeIDRef ID,
+                        enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEVNAryExpr(ID, T, O, N) {}
 
   public:
     virtual const char *getOperationStr() const = 0;
@@ -287,9 +288,9 @@ namespace llvm {
   class SCEVAddExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVAddExpr(const FoldingSetNodeID &ID,
-                const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scAddExpr, ops) {
+    SCEVAddExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scAddExpr, O, N) {
     }
 
   public:
@@ -315,9 +316,9 @@ namespace llvm {
   class SCEVMulExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVMulExpr(const FoldingSetNodeID &ID,
-                const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scMulExpr, ops) {
+    SCEVMulExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scMulExpr, O, N) {
     }
 
   public:
@@ -339,7 +340,7 @@ namespace llvm {
 
     const SCEV *LHS;
     const SCEV *RHS;
-    SCEVUDivExpr(const FoldingSetNodeID &ID, const SCEV *lhs, const SCEV *rhs)
+    SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
       : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
 
   public:
@@ -389,10 +390,10 @@ namespace llvm {
 
     const Loop *L;
 
-    SCEVAddRecExpr(const FoldingSetNodeID &ID,
-                   const SmallVectorImpl<const SCEV *> &ops, const Loop *l)
-      : SCEVNAryExpr(ID, scAddRecExpr, ops), L(l) {
-      for (size_t i = 0, e = Operands.size(); i != e; ++i)
+    SCEVAddRecExpr(const FoldingSetNodeIDRef ID,
+                   const SCEV *const *O, size_t N, const Loop *l)
+      : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {
+      for (size_t i = 0, e = NumOperands; i != e; ++i)
         assert(Operands[i]->isLoopInvariant(l) &&
                "Operands of AddRec must be loop-invariant!");
     }
@@ -471,9 +472,9 @@ namespace llvm {
   class SCEVSMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVSMaxExpr(const FoldingSetNodeID &ID,
-                 const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scSMaxExpr, ops) {
+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
       // Max never overflows.
       setHasNoUnsignedWrap(true);
       setHasNoSignedWrap(true);
@@ -496,9 +497,9 @@ namespace llvm {
   class SCEVUMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVUMaxExpr(const FoldingSetNodeID &ID,
-                 const SmallVectorImpl<const SCEV *> &ops)
-      : SCEVCommutativeExpr(ID, scUMaxExpr, ops) {
+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
       // Max never overflows.
       setHasNoUnsignedWrap(true);
       setHasNoSignedWrap(true);
@@ -523,7 +524,7 @@ namespace llvm {
     friend class ScalarEvolution;
 
     Value *V;
-    SCEVUnknown(const FoldingSetNodeID &ID, Value *v) :
+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *v) :
       SCEV(ID, scUnknown), V(v) {}
 
   public:
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index b8d04bf2132e..1b6ab2cd4cbc 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -83,9 +83,9 @@ public:
   /// getEntryAlignment - Return the alignment of each entry in the jump table.
   unsigned getEntryAlignment(const TargetData &TD) const;
   
-  /// getJumpTableIndex - Create a new jump table or return an existing one.
+  /// createJumpTableIndex - Create a new jump table.
   ///
-  unsigned getJumpTableIndex(const std::vector<MachineBasicBlock*> &DestBBs);
+  unsigned createJumpTableIndex(const std::vector<MachineBasicBlock*> &DestBBs);
   
   /// isEmpty - Return true if there are no jump tables.
   ///
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 80b7ca4f82df..b1f199604589 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -2516,6 +2516,11 @@ public:
   const Value *getCalledValue() const { return getOperand(0); }
         Value *getCalledValue()       { return getOperand(0); }
 
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<0>() = Fn;
+  }
+
   // get*Dest - Return the destination basic blocks...
   BasicBlock *getNormalDest() const {
     return cast<BasicBlock>(getOperand(1));
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 50ee3582b721..67abd955a1ae 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -892,7 +892,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_sse42_crc32_32         : GCCBuiltin<"__builtin_ia32_crc32si">,
           Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
                     [IntrNoMem]>;
-  def int_x86_sse42_crc32_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
+  def int_x86_sse42_crc64_8         :
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc64_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
           Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
                     [IntrNoMem]>;
 }
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 1d8051fdb060..363c7d977eba 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -24,8 +24,10 @@ class raw_ostream;
 class MCAsmLayout;
 class MCAssembler;
 class MCContext;
+class MCCodeEmitter;
 class MCExpr;
 class MCFragment;
+class MCObjectWriter;
 class MCSection;
 class MCSectionData;
 class MCSymbol;
@@ -35,7 +37,8 @@ class TargetAsmBackend;
 /// MCAsmFixup - Represent a fixed size region of bytes inside some fragment
 /// which needs to be rewritten. This region will either be rewritten by the
 /// assembler or cause a relocation entry to be generated.
-struct MCAsmFixup {
+class MCAsmFixup {
+public:
   /// Offset - The offset inside the fragment which needs to be rewritten.
   uint64_t Offset;
 
@@ -45,14 +48,9 @@ struct MCAsmFixup {
   /// Kind - The fixup kind.
   MCFixupKind Kind;
 
-  /// FixedValue - The value to replace the fix up by.
-  //
-  // FIXME: This should not be here.
-  uint64_t FixedValue;
-
 public:
   MCAsmFixup(uint64_t _Offset, const MCExpr &_Value, MCFixupKind _Kind)
-    : Offset(_Offset), Value(&_Value), Kind(_Kind), FixedValue(0) {}
+    : Offset(_Offset), Value(&_Value), Kind(_Kind) {}
 };
 
 class MCFragment : public ilist_node<MCFragment> {
@@ -590,6 +588,8 @@ public:
   typedef SymbolDataListType::const_iterator const_symbol_iterator;
   typedef SymbolDataListType::iterator symbol_iterator;
 
+  typedef std::vector<IndirectSymbolData>::const_iterator
+    const_indirect_symbol_iterator;
   typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
 
 private:
@@ -600,6 +600,8 @@ private:
 
   TargetAsmBackend &Backend;
 
+  MCCodeEmitter &Emitter;
+
   raw_ostream &OS;
 
   iplist<MCSectionData> Sections;
@@ -621,21 +623,6 @@ private:
   unsigned SubsectionsViaSymbols : 1;
 
 private:
-  /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
-  /// (increased in size, in order to hold its value correctly).
-  bool FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF);
-
-  /// LayoutSection - Assign offsets and sizes to the fragments in the section
-  /// \arg SD, and update the section size. The section file offset should
-  /// already have been computed.
-  void LayoutSection(MCSectionData &SD);
-
-  /// LayoutOnce - Perform one layout iteration and return true if any offsets
-  /// were adjusted.
-  bool LayoutOnce();
-
-  // FIXME: Make protected once we factor out object writer classes.
-public:
   /// Evaluate a fixup to a relocatable expression and the value which should be
   /// placed into the fixup.
   ///
@@ -653,6 +640,44 @@ public:
                      MCAsmFixup &Fixup, MCDataFragment *DF,
                      MCValue &Target, uint64_t &Value) const;
 
+  /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
+  /// (increased in size, in order to hold its value correctly).
+  bool FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF);
+
+  /// LayoutSection - Assign offsets and sizes to the fragments in the section
+  /// \arg SD, and update the section size. The section file offset should
+  /// already have been computed.
+  void LayoutSection(MCSectionData &SD);
+
+  /// LayoutOnce - Perform one layout iteration and return true if any offsets
+  /// were adjusted.
+  bool LayoutOnce();
+
+public:
+  /// Find the symbol which defines the atom containing given address, inside
+  /// the given section, or null if there is no such symbol.
+  //
+  // FIXME: Eliminate this, it is very slow.
+  const MCSymbolData *getAtomForAddress(const MCSectionData *Section,
+                                        uint64_t Address) const;
+
+  /// Find the symbol which defines the atom containing the given symbol, or
+  /// null if there is no such symbol.
+  //
+  // FIXME: Eliminate this, it is very slow.
+  const MCSymbolData *getAtom(const MCSymbolData *Symbol) const;
+
+  /// Check whether a particular symbol is visible to the linker and is required
+  /// in the symbol table, or whether it can be discarded by the assembler. This
+  /// also effects whether the assembler treats the label as potentially
+  /// defining a separate atom.
+  bool isSymbolLinkerVisible(const MCSymbolData *SD) const;
+
+  /// Emit the section contents using the given object writer.
+  //
+  // FIXME: Should MCAssembler always have a reference to the object writer?
+  void WriteSectionData(const MCSectionData *Section, MCObjectWriter *OW) const;
+
 public:
   /// Construct a new assembler instance.
   ///
@@ -662,13 +687,16 @@ public:
   // concrete and require clients to pass in a target like object. The other
   // option is to make this abstract, and have targets provide concrete
   // implementations as we do with AsmParser.
-  MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, raw_ostream &OS);
+  MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
+              MCCodeEmitter &_Emitter, raw_ostream &OS);
   ~MCAssembler();
 
   MCContext &getContext() const { return Context; }
 
   TargetAsmBackend &getBackend() const { return Backend; }
 
+  MCCodeEmitter &getEmitter() const { return Emitter; }
+
   /// Finish - Do final processing and write the object to the output stream.
   void Finish();
 
@@ -723,10 +751,16 @@ public:
   indirect_symbol_iterator indirect_symbol_begin() {
     return IndirectSymbols.begin();
   }
+  const_indirect_symbol_iterator indirect_symbol_begin() const {
+    return IndirectSymbols.begin();
+  }
 
   indirect_symbol_iterator indirect_symbol_end() {
     return IndirectSymbols.end();
   }
+  const_indirect_symbol_iterator indirect_symbol_end() const {
+    return IndirectSymbols.end();
+  }
 
   size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
 
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index fe1aff4f8c92..010a2e556629 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -22,6 +22,12 @@ template<typename T> class SmallVectorImpl;
 
 /// MCFixupKindInfo - Target independent information on a fixup kind.
 struct MCFixupKindInfo {
+  enum FixupKindFlags {
+    /// Is this fixup kind PCrelative. This is used by the assembler backend to
+    /// evaluate fixup values in a target independent manner when possible.
+    FKF_IsPCRel = (1 << 0)
+  };
+
   /// A target specific name for the fixup kind. The names will be unique for
   /// distinct kinds on any given target.
   const char *Name;
@@ -36,6 +42,9 @@ struct MCFixupKindInfo {
   /// The number of bits written by this fixup. The bits are assumed to be
   /// contiguous.
   unsigned TargetSize;
+
+  /// Flags describing additional information on this fixup kind.
+  unsigned Flags;
 };
 
 /// MCCodeEmitter - Generic instruction encoding interface.
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 85114e30995e..c5814b377eb8 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -73,9 +73,10 @@ namespace llvm {
     /// one if it does.
     ///
     /// @param Name - The symbol name, for debugging purposes only, temporary
-    /// symbols do not surive assembly. If non-empty the name must be unique
-    /// across all symbols.
-    MCSymbol *GetOrCreateTemporarySymbol(StringRef Name = "");
+    /// symbols do not surive assembly.
+    MCSymbol *GetOrCreateTemporarySymbol(StringRef Name) {
+      return GetOrCreateSymbol(Name, true);
+    }
     MCSymbol *GetOrCreateTemporarySymbol(const Twine &Name);
 
     /// LookupSymbol - Get the symbol for \p Name, or null.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
new file mode 100644
index 000000000000..d4fab0ea477f
--- /dev/null
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -0,0 +1,162 @@
+//===-- llvm/MC/MCObjectWriter.h - Object File Writer Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTWRITER_H
+#define LLVM_MC_MCOBJECTWRITER_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmFixup;
+class MCAssembler;
+class MCDataFragment;
+class MCValue;
+class raw_ostream;
+
+/// MCObjectWriter - Defines the object file and target independent interfaces
+/// used by the assembler backend to write native file format object files.
+///
+/// The object writer contains a few callbacks used by the assembler to allow
+/// the object writer to modify the assembler data structures at appropriate
+/// points. Once assembly is complete, the object writer is given the
+/// MCAssembler instance, which contains all the symbol and section data which
+/// should be emitted as part of WriteObject().
+///
+/// The object writer also contains a number of helper methods for writing
+/// binary data to the output stream.
+class MCObjectWriter {
+  MCObjectWriter(const MCObjectWriter &); // DO NOT IMPLEMENT
+  void operator=(const MCObjectWriter &); // DO NOT IMPLEMENT
+
+protected:
+  raw_ostream &OS;
+
+  unsigned IsLittleEndian : 1;
+
+protected: // Can only create subclasses.
+  MCObjectWriter(raw_ostream &_OS, bool _IsLittleEndian)
+    : OS(_OS), IsLittleEndian(_IsLittleEndian) {}
+
+public:
+  virtual ~MCObjectWriter();
+
+  bool isLittleEndian() { return IsLittleEndian; }
+
+  raw_ostream &getStream() { return OS; }
+
+  /// @name High-Level API
+  /// @{
+
+  /// Perform any late binding of symbols (for example, to assign symbol indices
+  /// for use when generating relocations).
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete.
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm) = 0;
+
+  /// Record a relocation entry.
+  ///
+  /// This routine is called by the assembler after layout and relaxation, and
+  /// post layout binding. The implementation is responsible for storing
+  /// information about the relocation so that it can be emitted during
+  /// WriteObject().
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCDataFragment &Fragment,
+                                const MCAsmFixup &Fixup, MCValue Target,
+                                uint64_t &FixedValue) = 0;
+
+  /// Write the object file.
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete, fixups have been evaluate and applied, and relocations
+  /// generated.
+  virtual void WriteObject(const MCAssembler &Asm) = 0;
+
+  /// @}
+  /// @name Binary Output
+  /// @{
+
+  void Write8(uint8_t Value) {
+    OS << char(Value);
+  }
+
+  void WriteLE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 0));
+    Write8(uint8_t(Value >> 8));
+  }
+
+  void WriteLE32(uint32_t Value) {
+    WriteLE16(uint16_t(Value >> 0));
+    WriteLE16(uint16_t(Value >> 16));
+  }
+
+  void WriteLE64(uint64_t Value) {
+    WriteLE32(uint32_t(Value >> 0));
+    WriteLE32(uint32_t(Value >> 32));
+  }
+
+  void WriteBE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 8));
+    Write8(uint8_t(Value >> 0));
+  }
+
+  void WriteBE32(uint32_t Value) {
+    WriteBE16(uint16_t(Value >> 16));
+    WriteBE16(uint16_t(Value >> 0));
+  }
+
+  void WriteBE64(uint64_t Value) {
+    WriteBE32(uint32_t(Value >> 32));
+    WriteBE32(uint32_t(Value >> 0));
+  }
+
+  void Write16(uint16_t Value) {
+    if (IsLittleEndian)
+      WriteLE16(Value);
+    else
+      WriteBE16(Value);
+  }
+
+  void Write32(uint32_t Value) {
+    if (IsLittleEndian)
+      WriteLE32(Value);
+    else
+      WriteBE32(Value);
+  }
+
+  void Write64(uint64_t Value) {
+    if (IsLittleEndian)
+      WriteLE64(Value);
+    else
+      WriteBE64(Value);
+  }
+
+  void WriteZeros(unsigned N) {
+    const char Zeros[16] = { 0 };
+
+    for (unsigned i = 0, e = N / 16; i != e; ++i)
+      OS << StringRef(Zeros, 16);
+
+    OS << StringRef(Zeros, N % 16);
+  }
+
+  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    OS << Str;
+    if (ZeroFillSize)
+      WriteZeros(ZeroFillSize - Str.size());
+  }
+
+  /// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 47befcaf6b0d..4b088a57d748 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -291,7 +291,8 @@ class TargetAsmBackend;
   /// assembler.
   ///
   /// \param InstPrint - If given, the instruction printer to use. If not given
-  /// the MCInst representation will be printed.
+  /// the MCInst representation will be printed.  This method takes ownership of
+  /// InstPrint.
   ///
   /// \param CE - If given, a code emitter to use to show the instruction
   /// encoding inline with the assembly.
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index e41eb2ab8859..fb96506e4401 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -28,8 +28,7 @@ namespace llvm {
   ///
   /// If the symbol is defined/emitted into the current translation unit, the
   /// Section member is set to indicate what section it lives in.  Otherwise, if
-  /// it is a reference to an external entity, it has a null section.  
-  /// 
+  /// it is a reference to an external entity, it has a null section.
   class MCSymbol {
     // Special sentinal value for the absolute pseudo section.
     //
@@ -52,7 +51,7 @@ namespace llvm {
     /// typically does not survive in the .o file's symbol table.  Usually
     /// "Lfoo" or ".foo".
     unsigned IsTemporary : 1;
-    
+
   private:  // MCContext creates and uniques these.
     friend class MCContext;
     MCSymbol(StringRef name, bool isTemporary)
@@ -83,6 +82,12 @@ namespace llvm {
       return Section != 0;
     }
 
+    /// isInSection - Check if this symbol is defined in some section (i.e., it
+    /// is defined but not absolute).
+    bool isInSection() const {
+      return isDefined() && !isAbsolute();
+    }
+
     /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
     bool isUndefined() const {
       return !isDefined();
@@ -96,7 +101,7 @@ namespace llvm {
     /// getSection - Get the section associated with a defined, non-absolute
     /// symbol.
     const MCSection &getSection() const {
-      assert(!isUndefined() && !isAbsolute() && "Invalid accessor!");
+      assert(isInSection() && "Invalid accessor!");
       return *Section;
     }
 
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 8aa73f350f0c..11b6c2a17b70 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -19,8 +19,9 @@
 #include <cassert>
 
 namespace llvm {
-class MCSymbol;
 class MCAsmInfo;
+class MCSymbol;
+class MCSymbolRefExpr;
 class raw_ostream;
 
 /// MCValue - This represents an "assembler immediate".  In its most general
@@ -34,13 +35,13 @@ class raw_ostream;
 /// Note that this class must remain a simple POD value class, because we need
 /// it to live in unions etc.
 class MCValue {
-  const MCSymbol *SymA, *SymB;
+  const MCSymbolRefExpr *SymA, *SymB;
   int64_t Cst;
 public:
 
   int64_t getConstant() const { return Cst; }
-  const MCSymbol *getSymA() const { return SymA; }
-  const MCSymbol *getSymB() const { return SymB; }
+  const MCSymbolRefExpr *getSymA() const { return SymA; }
+  const MCSymbolRefExpr *getSymB() const { return SymB; }
 
   /// isAbsolute - Is this an absolute (as opposed to relocatable) value.
   bool isAbsolute() const { return !SymA && !SymB; }
@@ -57,11 +58,11 @@ public:
 
   /// print - Print the value to the stream \arg OS.
   void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
-  
+
   /// dump - Print the value to stderr.
   void dump() const;
 
-  static MCValue get(const MCSymbol *SymA, const MCSymbol *SymB = 0,
+  static MCValue get(const MCSymbolRefExpr *SymA, const MCSymbolRefExpr *SymB=0,
                      int64_t Val = 0) {
     MCValue R;
     assert((!SymB || SymA) && "Invalid relocatable MCValue!");
@@ -70,7 +71,7 @@ public:
     R.SymB = SymB;
     return R;
   }
-  
+
   static MCValue get(int64_t Val) {
     MCValue R;
     R.Cst = Val;
@@ -78,7 +79,7 @@ public:
     R.SymB = 0;
     return R;
   }
-  
+
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MachObjectWriter.h b/include/llvm/MC/MachObjectWriter.h
new file mode 100644
index 000000000000..3e3305f083a2
--- /dev/null
+++ b/include/llvm/MC/MachObjectWriter.h
@@ -0,0 +1,43 @@
+//===-- llvm/MC/MachObjectWriter.h - Mach-O File Writer ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MACHOBJECTWRITER_H
+#define LLVM_MC_MACHOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmFixup;
+class MCAssembler;
+class MCDataFragment;
+class MCValue;
+class raw_ostream;
+
+class MachObjectWriter : public MCObjectWriter {
+  void *Impl;
+
+public:
+  MachObjectWriter(raw_ostream &OS, bool Is64Bit, bool IsLittleEndian = true);
+  virtual ~MachObjectWriter();
+
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCDataFragment &Fragment,
+                                const MCAsmFixup &Fixup, MCValue Target,
+                                uint64_t &FixedValue);
+
+  virtual void WriteObject(const MCAssembler &Asm);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index b0ed33d6ed5a..b1f59dc05e7b 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,9 +15,12 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/System/DataTypes.h"
+#include <algorithm>
 #include <cassert>
 #include <cstdlib>
+#include <cstddef>
 
 namespace llvm {
 
@@ -175,4 +178,22 @@ public:
 
 }  // end namespace llvm
 
+inline void *operator new(size_t Size, llvm::BumpPtrAllocator &Allocator) {
+  struct S {
+    char c;
+#ifdef __GNUC__
+    char x __attribute__((aligned));
+#else
+    union {
+      double D;
+      long double LD;
+      long long L;
+      void *P;
+    } x;
+#endif
+  };
+  return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size),
+                                           offsetof(S, x)));
+}
+
 #endif // LLVM_SUPPORT_ALLOCATOR_H
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index 609193ffd762..49f77537188d 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -56,4 +56,11 @@ public:
 
 }
 
+template<class AllocatorType, class T, size_t Size, size_t Align>
+inline void *operator new(size_t,
+                          llvm::RecyclingAllocator<AllocatorType,
+                                                   T, Size, Align> &Allocator) {
+  return Allocator.Allocate();
+}
+
 #endif
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 0cffffb88af1..0a7f54937705 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -389,66 +389,66 @@ class InstrInfo {
 // Standard Pseudo Instructions.
 let isCodeGenOnly = 1 in {
 def PHI : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "PHINODE";
   let Namespace = "TargetOpcode";
 }
 def INLINEASM : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "";
   let Namespace = "TargetOpcode";
 }
 def DBG_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def EH_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def GC_LABEL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops i32imm:$id);
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let hasCtrlDep = 1;
   let isNotDuplicable = 1;
 }
 def KILL : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def EXTRACT_SUBREG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$supersrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def INSERT_SUBREG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
   let Constraints = "$supersrc = $dst";
 }
 def IMPLICIT_DEF : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
@@ -456,23 +456,23 @@ def IMPLICIT_DEF : Instruction {
   let isAsCheapAsAMove = 1;
 }
 def SUBREG_TO_REG : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
 }
 def COPY_TO_REGCLASS : Instruction {
-  let OutOperandList = (ops unknown:$dst);
-  let InOperandList = (ops unknown:$src, i32imm:$regclass);
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src, i32imm:$regclass);
   let AsmString = "";
   let Namespace = "TargetOpcode";
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
 def DBG_VALUE : Instruction {
-  let OutOperandList = (ops);
-  let InOperandList = (ops variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
   let AsmString = "DBG_VALUE";
   let Namespace = "TargetOpcode";
   let isAsCheapAsAMove = 1;
@@ -491,6 +491,11 @@ class AsmParser {
   // class.  Generated AsmParser classes are always prefixed with the target
   // name.
   string AsmParserClassName  = "AsmParser";
+
+  // AsmParserInstCleanup - If non-empty, this is the name of a custom function on the
+  // AsmParser class to call on every matched instruction. This can be used to
+  // perform target specific instruction post-processing.
+  string AsmParserInstCleanup  = "";
  
   // Variant - AsmParsers can be of multiple different variants.  Variants are
   // used to support targets that need to parser multiple formats for the 
diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h
index 35a995f541e2..bb501cc5226b 100644
--- a/include/llvm/Target/TargetAsmBackend.h
+++ b/include/llvm/Target/TargetAsmBackend.h
@@ -10,9 +10,15 @@
 #ifndef LLVM_TARGET_TARGETASMBACKEND_H
 #define LLVM_TARGET_TARGETASMBACKEND_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
+class MCAsmFixup;
+class MCDataFragment;
+class MCObjectWriter;
 class MCSection;
 class Target;
+class raw_ostream;
 
 /// TargetAsmBackend - Generic interface to target specific assembler backends.
 class TargetAsmBackend {
@@ -24,11 +30,19 @@ protected: // Can only create subclasses.
   /// TheTarget - The Target that this machine was created for.
   const Target &TheTarget;
 
+  unsigned HasAbsolutizedSet : 1;
+  unsigned HasReliableSymbolDifference : 1;
+  unsigned HasScatteredSymbols : 1;
+
 public:
   virtual ~TargetAsmBackend();
 
   const Target &getTarget() const { return TheTarget; }
 
+  /// createObjectWriter - Create a new MCObjectWriter instance for use by the
+  /// assembler backend to emit the final object file.
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
+
   /// hasAbsolutizedSet - Check whether this target "absolutizes"
   /// assignments. That is, given code like:
   ///   a:
@@ -40,7 +54,21 @@ public:
   /// value of L0 - L1. This distinction is only relevant for platforms that
   /// support scattered symbols, since in the absence of scattered symbols (a -
   /// b) cannot change after assembly.
-  virtual bool hasAbsolutizedSet() const { return false; }
+  bool hasAbsolutizedSet() const { return HasAbsolutizedSet; }
+
+  /// hasReliableSymbolDifference - Check whether this target implements
+  /// accurate relocations for differences between symbols. If not, differences
+  /// between symbols will always be relocatable expressions and any references
+  /// to temporary symbols will be assumed to be in the same atom, unless they
+  /// reside in a different section.
+  ///
+  /// This should always be true (since it results in fewer relocations with no
+  /// loss of functionality), but is currently supported as a way to maintain
+  /// exact object compatibility with Darwin 'as' (on non-x86_64). It should
+  /// eventually should be eliminated. See also \see hasAbsolutizedSet.
+  bool hasReliableSymbolDifference() const {
+    return HasReliableSymbolDifference;
+  }
 
   /// hasScatteredSymbols - Check whether this target supports scattered
   /// symbols. If so, the assembler should assume that atoms can be scattered by
@@ -50,13 +78,23 @@ public:
   ///
   /// Note that the assembler currently does not reason about atoms, instead it
   /// assumes all temporary symbols reside in the "current atom".
-  virtual bool hasScatteredSymbols() const { return false; }
+  bool hasScatteredSymbols() const { return HasScatteredSymbols; }
 
   /// doesSectionRequireSymbols - Check whether the given section requires that
   /// all symbols (even temporaries) have symbol table entries.
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
     return false;
   }
+
+  /// isVirtualSection - Check whether the given section is "virtual", that is
+  /// has no actual object file contents.
+  virtual bool isVirtualSection(const MCSection &Section) const = 0;
+
+  /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+  /// data fragment, at the offset specified by the fixup and following the
+  /// fixup kind as appropriate.
+  virtual void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &Fragment,
+                          uint64_t Value) const = 0;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index b19c20af346f..da0f68606c6f 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -307,7 +307,7 @@ public:
   /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If
   /// this is the case, it returns true and store the intrinsic
   /// information into the IntrinsicInfo that was passed to the function.
-  typedef struct IntrinsicInfo { 
+  struct IntrinsicInfo { 
     unsigned     opc;         // target opcode
     EVT          memVT;       // memory VT
     const Value* ptrVal;      // value representing memory location
@@ -316,9 +316,9 @@ public:
     bool         vol;         // is volatile?
     bool         readMem;     // reads memory?
     bool         writeMem;    // writes memory?
-  } IntrinisicInfo;
+  };
 
-  virtual bool getTgtMemIntrinsic(IntrinsicInfo& Info,
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
                                   CallInst &I, unsigned Intrinsic) {
     return false;
   }
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index b63c2bf0416a..a01a67f14dae 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -144,11 +144,6 @@ namespace llvm {
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
-  /// DisableScheduling - This flag disables instruction scheduling. In
-  /// particular, it assigns an ordering to the SDNodes, which the scheduler
-  /// uses instead of its normal heuristics to perform scheduling.
-  extern bool DisableScheduling;
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 43738638cf8e..6b6dad8185b0 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -55,7 +55,7 @@ namespace llvm {
 
     typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
 
-    typedef const MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
+    typedef MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
                                                 StringRef TT);
     typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
                                                   const std::string &TT,
@@ -68,7 +68,7 @@ namespace llvm {
     typedef TargetAsmLexer *(*AsmLexerCtorTy)(const Target &T,
                                               const MCAsmInfo &MAI);
     typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P);
-    typedef const MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
+    typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
     typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI,
@@ -184,7 +184,7 @@ namespace llvm {
     /// feature set; it should always be provided. Generally this should be
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
-    const MCAsmInfo *createAsmInfo(StringRef Triple) const {
+    MCAsmInfo *createAsmInfo(StringRef Triple) const {
       if (!AsmInfoCtorFn)
         return 0;
       return AsmInfoCtorFn(*this, Triple);
@@ -241,7 +241,7 @@ namespace llvm {
       return AsmPrinterCtorFn(OS, TM, Streamer);
     }
 
-    const MCDisassembler *createMCDisassembler() const {
+    MCDisassembler *createMCDisassembler() const {
       if (!MCDisassemblerCtorFn)
         return 0;
       return MCDisassemblerCtorFn(*this);
@@ -529,7 +529,7 @@ namespace llvm {
       TargetRegistry::RegisterAsmInfo(T, &Allocator);
     }
   private:
-    static const MCAsmInfo *Allocator(const Target &T, StringRef TT) {
+    static MCAsmInfo *Allocator(const Target &T, StringRef TT) {
       return new MCAsmInfoImpl(T, TT);
     }
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index c718c86e60ad..e56d886a0a1a 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -219,6 +219,7 @@ def SDNPMayStore    : SDNodeProperty;   // May write to memory, sets 'mayStore'.
 def SDNPMayLoad     : SDNodeProperty;   // May read memory, sets 'mayLoad'.
 def SDNPSideEffect  : SDNodeProperty;   // Sets 'HasUnmodelledSideEffects'.
 def SDNPMemOperand  : SDNodeProperty;   // Touches memory, has assoc MemOperand
+def SDNPVariadic    : SDNodeProperty;   // Node has variable arguments.
 
 //===----------------------------------------------------------------------===//
 // Selection DAG Node definitions.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 96bb02714a03..dda1fbad64f0 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -564,21 +564,6 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
 
   unsigned BitWidth =
     TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
-  APInt BasePtr(BitWidth, 0);
-  bool BaseIsInt = true;
-  if (!Ptr->isNullValue()) {
-    // If this is a inttoptr from a constant int, we can fold this as the base,
-    // otherwise we can't.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-      if (CE->getOpcode() == Instruction::IntToPtr)
-        if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
-          BasePtr = Base->getValue();
-          BasePtr.zextOrTrunc(BitWidth);
-        }
-    
-    if (BasePtr == 0)
-      BaseIsInt = false;
-  }
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -615,7 +600,14 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
 
   // If the base value for this address is a literal integer value, fold the
   // getelementptr to the resulting integer value casted to the pointer type.
-  if (BaseIsInt) {
+  APInt BasePtr(BitWidth, 0);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        BasePtr = Base->getValue();
+        BasePtr.zextOrTrunc(BitWidth);
+      }
+  if (Ptr->isNullValue() || BasePtr != 0) {
     Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
     return ConstantExpr::getIntToPtr(C, ResultTy);
   }
@@ -1002,6 +994,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::usub_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16:
     return true;
   default:
     return false;
@@ -1082,6 +1076,15 @@ llvm::ConstantFoldCall(Function *F,
   const Type *Ty = F->getReturnType();
   if (NumOperands == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+      if (Name == "llvm.convert.to.fp16") {
+        APFloat Val(Op->getValueAPF());
+
+        bool lost = false;
+        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+        return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+      }
+
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
       /// Currently APFloat versions of these functions do not exist, so we use
@@ -1166,6 +1169,20 @@ llvm::ConstantFoldCall(Function *F,
         return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
       else if (Name.startswith("llvm.ctlz"))
         return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      else if (Name == "llvm.convert.from.fp16") {
+        APFloat Val(Op->getValue());
+
+        bool lost = false;
+        APFloat::opStatus status =
+          Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+        // Conversion is always precise.
+        status = status;
+        assert(status == APFloat::opOK && !lost &&
+               "Precision lost during fp16 constfolding");
+
+        return ConstantFP::get(F->getContext(), Val);
+      }
       return 0;
     }
     
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index bb4f46dff9af..e1019474cf43 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -119,8 +119,7 @@ bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
   P = Pairs.FindNodeOrInsertPos(id, insertPos);
   if (P) return true;
 
-  P = PairAllocator.Allocate<DependencePair>();
-  new (P) DependencePair(id, A, B);
+  P = new (PairAllocator) DependencePair(id, A, B);
   Pairs.InsertNode(P, insertPos);
   return false;
 }
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 15f072dbeb7e..1af271a93eff 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -141,7 +141,7 @@ bool SCEV::isAllOnesValue() const {
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
-  SCEV(FoldingSetNodeID(), scCouldNotCompute) {}
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
   llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
@@ -177,8 +177,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVConstant>();
-  new (S) SCEVConstant(ID, V);
+  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -199,7 +198,7 @@ void SCEVConstant::print(raw_ostream &OS) const {
   WriteAsOperand(OS, V, false);
 }
 
-SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID,
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
                            unsigned SCEVTy, const SCEV *op, const Type *ty)
   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
@@ -211,7 +210,7 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->properlyDominates(BB, DT);
 }
 
-SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                                    const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -223,7 +222,7 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -235,7 +234,7 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
@@ -248,10 +247,10 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
 }
 
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
-  assert(Operands.size() > 1 && "This plus expr shouldn't exist!");
+  assert(NumOperands > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
   OS << "(" << *Operands[0];
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 1, e = NumOperands; i != e; ++i)
     OS << OpStr << *Operands[i];
   OS << ")";
 }
@@ -329,7 +328,7 @@ SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
 
 void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 1, e = NumOperands; i != e; ++i)
     OS << ",+," << *Operands[i];
   OS << "}<";
   WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
@@ -846,8 +845,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>();
-  new (S) SCEVTruncateExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+                                                 Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -981,8 +980,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>();
-  new (S) SCEVZeroExtendExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1116,8 +1115,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>();
-  new (S) SCEVSignExtendExpr(ID, Op, Ty);
+  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -1202,23 +1201,23 @@ static bool
 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
                              SmallVector<const SCEV *, 8> &NewOps,
                              APInt &AccumulatedConstant,
-                             const SmallVectorImpl<const SCEV *> &Ops,
+                             const SCEV *const *Ops, size_t NumOperands,
                              const APInt &Scale,
                              ScalarEvolution &SE) {
   bool Interesting = false;
 
   // Iterate over the add operands.
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+  for (unsigned i = 0, e = NumOperands; i != e; ++i) {
     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
       APInt NewScale =
         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
         // A multiplication of a constant with another add; recurse.
+        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
         Interesting |=
           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
-                                       cast<SCEVAddExpr>(Mul->getOperand(1))
-                                         ->getOperands(),
+                                       Add->op_begin(), Add->getNumOperands(),
                                        NewScale, SE);
       } else {
         // A multiplication of a constant with some other value. Update
@@ -1427,7 +1426,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     SmallVector<const SCEV *, 8> NewOps;
     APInt AccumulatedConstant(BitWidth, 0);
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
-                                     Ops, APInt(BitWidth, 1), *this)) {
+                                     Ops.data(), Ops.size(),
+                                     APInt(BitWidth, 1), *this)) {
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
@@ -1611,8 +1611,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   SCEVAddExpr *S =
     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVAddExpr>();
-    new (S) SCEVAddExpr(ID, Ops);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -1819,8 +1821,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   SCEVMulExpr *S =
     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVMulExpr>();
-    new (S) SCEVMulExpr(ID, Ops);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -1880,9 +1884,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           const SCEV *Op = M->getOperand(i);
           const SCEV *Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-            Operands = SmallVector<const SCEV *, 4>(MOperands.begin(),
-                                                  MOperands.end());
+            Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end());
             Operands[i] = Div;
             return getMulExpr(Operands);
           }
@@ -1921,8 +1923,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   ID.AddPointer(RHS);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>();
-  new (S) SCEVUDivExpr(ID, LHS, RHS);
+  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+                                             LHS, RHS);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2030,8 +2032,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   SCEVAddRecExpr *S =
     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
-    S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-    new (S) SCEVAddRecExpr(ID, Operands, L);
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+                                           O, Operands.size(), L);
     UniqueSCEVs.InsertNode(S, IP);
   }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
@@ -2130,8 +2134,10 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>();
-  new (S) SCEVSMaxExpr(ID, Ops);
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2227,8 +2233,10 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     ID.AddPointer(Ops[i]);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>();
-  new (S) SCEVUMaxExpr(ID, Ops);
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
@@ -2290,8 +2298,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
   ID.AddPointer(V);
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>();
-  new (S) SCEVUnknown(ID, V);
+  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
   return S;
 }
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 3c2cbfbe78eb..138cdc64cd0c 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -232,9 +232,7 @@ static bool FactorOutConstant(const SCEV *&S,
       const SCEVConstant *FC = cast<SCEVConstant>(Factor);
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
         if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
-          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                                 MOperands.end());
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
           NewMulOps[0] =
             SE.getConstant(C->getValue()->getValue().sdiv(
                                                    FC->getValue()->getValue()));
@@ -249,9 +247,7 @@ static bool FactorOutConstant(const SCEV *&S,
         const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType());
         if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
             Remainder->isZero()) {
-          const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands();
-          SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(),
-                                                 MOperands.end());
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
           NewMulOps[i] = SOp;
           S = SE.getMulExpr(NewMulOps);
           return true;
@@ -297,13 +293,11 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
                     SE.getAddExpr(NoAddRecs);
   // If it returned an add, use the operands. Otherwise it simplified
   // the sum into a single value, so just use that.
+  Ops.clear();
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
-    Ops = Add->getOperands();
-  else {
-    Ops.clear();
-    if (!Sum->isZero())
-      Ops.push_back(Sum);
-  }
+    Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+  else if (!Sum->isZero())
+    Ops.push_back(Sum);
   // Then append the addrecs.
   Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
 }
@@ -1060,10 +1054,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (CanonicalIV &&
       SE.getTypeSizeInBits(CanonicalIV->getType()) >
       SE.getTypeSizeInBits(Ty)) {
-    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
-    SmallVector<const SCEV *, 4> NewOps(Ops.size());
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType());
+    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
@@ -1078,8 +1071,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands();
-    SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end());
+    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
     const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
 
@@ -1248,6 +1240,15 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   return LHS;
 }
 
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+                                   Instruction *I) {
+  BasicBlock::iterator IP = I;
+  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+    ++IP;
+  Builder.SetInsertPoint(IP->getParent(), IP);
+  return expandCodeFor(SH, Ty);
+}
+
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
   // Expand the code for this SCEV.
   Value *V = expand(SH);
@@ -1286,9 +1287,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
         InsertPt = L->getHeader()->getFirstNonPHI();
-      while (isa<DbgInfoIntrinsic>(InsertPt))
-        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
-      while (isInsertedInstruction(InsertPt))
+      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
       break;
     }
@@ -1324,7 +1323,8 @@ void SCEVExpander::rememberInstruction(Value *I) {
   // subsequently inserted code will be dominated.
   if (Builder.GetInsertPoint() == I) {
     BasicBlock::iterator It = cast<Instruction>(I);
-    do { ++It; } while (isInsertedInstruction(It));
+    do { ++It; } while (isInsertedInstruction(It) ||
+                        isa<DbgInfoIntrinsic>(It));
     Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
   }
 }
@@ -1332,7 +1332,7 @@ void SCEVExpander::rememberInstruction(Value *I) {
 void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
   // If we acquired more instructions since the old insert point was saved,
   // advance past them.
-  while (isInsertedInstruction(I)) ++I;
+  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
 
   Builder.SetInsertPoint(BB, I);
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index a32883720be3..b9453c90b583 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -293,6 +293,8 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
         NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
                                          UserCS->getType()->isPacked());
+      } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) {
+        NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(&NewOps[0], NewOps.size());
       } else {
@@ -1015,6 +1017,11 @@ bool BitcodeReader::ParseConstants() {
           Elts.push_back(ValueList.getConstantFwdRef(Record[i],
                                                      STy->getElementType(i)));
         V = ConstantStruct::get(STy, Elts);
+      } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) {
+        uint64_t Index = Record[0];
+        Constant *Val = ValueList.getConstantFwdRef(Record[1],
+                                        UnTy->getElementType(Index));
+        V = ConstantUnion::get(UnTy, Val);
       } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
         const Type *EltTy = ATy->getElementType();
         for (unsigned i = 0; i != Size; ++i)
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 82e73b5cff25..3ab272666749 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -808,11 +808,25 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
+               isa<ConstantVector>(V)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
       AbbrevToUse = AggregateAbbrev;
+    } else if (isa<ConstantUnion>(C)) {
+      Code = bitc::CST_CODE_AGGREGATE;
+
+      // Unions only have one entry but we must send type along with it.
+      const Type *EntryKind = C->getOperand(0)->getType();
+
+      const UnionType *UnTy = cast<UnionType>(C->getType());
+      int UnionIndex = UnTy->getElementTypeIndex(EntryKind);
+      assert(UnionIndex != -1 && "Constant union contains invalid entry");
+
+      Record.push_back(UnionIndex);
+      Record.push_back(VE.getValueID(C->getOperand(0)));
+
+      AbbrevToUse = AggregateAbbrev;
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       switch (CE->getOpcode()) {
       default:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2636e2c4017f..1d4f7f7ae68d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1138,6 +1138,21 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
          "Layout of constant struct may be incorrect!");
 }
 
+static void EmitGlobalConstantUnion(const ConstantUnion *CU, 
+                                    unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CU->getType());
+
+  const Constant *Contents = CU->getOperand(0);
+  unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
+    
+  // Print the actually filled part
+  AP.EmitGlobalConstant(Contents, AddrSpace);
+
+  // And pad with enough zeroes
+  AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
+}
+
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
   // FP Constants are printed as integer constants to avoid losing
@@ -1257,9 +1272,6 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
     return EmitGlobalConstantFP(CFP, AddrSpace, *this);
-  
-  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, *this);
 
   if (isa<ConstantPointerNull>(CV)) {
     unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
@@ -1267,6 +1279,12 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
     return;
   }
   
+  if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
+    return EmitGlobalConstantUnion(CVU, AddrSpace, *this);
+  
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, *this);
+  
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
   OutStreamer.EmitValue(LowerConstant(CV, *this),
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 866f457139ae..7153fe20dcf6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2531,8 +2531,8 @@ void DwarfDebug::emitDebugInfo() {
   Asm->OutStreamer.AddComment("DWARF version number");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-  EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"),
-                    true, false);
+  EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"), 
+                    true);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(TD->getPointerSize());
 
@@ -2842,8 +2842,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
   Asm->OutStreamer.EmitLabel(DebugFrameBegin);
 
   Asm->OutStreamer.AddComment("FDE CIE offset");
-  EmitSectionOffset(getTempLabel("debug_frame_common"),
-                    getTempLabel("section_debug_frame"), true, false);
+  EmitSectionOffset(getTempLabel("debug_frame_common"), 
+                    getTempLabel("section_debug_frame"), true);
 
   Asm->OutStreamer.AddComment("FDE initial location");
   MCSymbol *FuncBeginSym = getDWLabel("func_begin", DebugFrameInfo.Number);
@@ -2878,8 +2878,7 @@ void DwarfDebug::emitDebugPubNames() {
 
   Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
   EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()), 
-                    getTempLabel("section_info"),
-                    true, false);
+                    getTempLabel("section_info"), true);
 
   Asm->OutStreamer.AddComment("Compilation Unit Length");
   EmitDifference(getDWLabel("info_end", ModuleCU->getID()),
@@ -2920,7 +2919,7 @@ void DwarfDebug::emitDebugPubTypes() {
 
   Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info");
   EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()),
-                    getTempLabel("section_info"), true, false);
+                    getTempLabel("section_info"), true);
 
   Asm->OutStreamer.AddComment("Compilation ModuleCU Length");
   EmitDifference(getDWLabel("info_end", ModuleCU->getID()),
@@ -3068,8 +3067,8 @@ void DwarfDebug::emitDebugInlineInfo() {
                         getTempLabel("section_str"), true);
 
     Asm->OutStreamer.AddComment("Function name");
-    EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"),
-                      false, true);
+    EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"), 
+                      true);
     EmitULEB128(Labels.size(), "Inline count");
 
     for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 13ae43daf6dd..151e9cd4403d 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -192,7 +192,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // See if any jump tables have become mergable or dead as the code generator
+  // See if any jump tables have become dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
   if (JTI == 0) {
@@ -200,27 +200,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     return MadeChange;
   }
   
-  const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
-  // Figure out how these jump tables should be merged.
-  std::vector<unsigned> JTMapping;
-  JTMapping.reserve(JTs.size());
-
-  // We always keep the 0th jump table.
-  JTMapping.push_back(0);
-
-  // Scan the jump tables, seeing if there are any duplicates.  Note that this
-  // is N^2, which should be fixed someday.
-  for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
-    if (JTs[i].MBBs.empty())
-      JTMapping.push_back(i);
-    else
-      JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
-  }
-
-  // If a jump table was merge with another one, walk the function rewriting
-  // references to jump tables to reference the new JT ID's.  Keep track of
-  // whether we see a jump table idx, if not, we can delete the JT.
-  BitVector JTIsLive(JTs.size());
+  // Walk the function to find jump tables that are live.
+  BitVector JTIsLive(JTI->getJumpTables().size());
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
        BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -228,17 +209,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
         MachineOperand &Op = I->getOperand(op);
         if (!Op.isJTI()) continue;
-        unsigned NewIdx = JTMapping[Op.getIndex()];
-        Op.setIndex(NewIdx);
 
         // Remember that this JT is live.
-        JTIsLive.set(NewIdx);
+        JTIsLive.set(Op.getIndex());
       }
   }
 
-  // Finally, remove dead jump tables.  This happens either because the
-  // indirect jump was unreachable (and thus deleted) or because the jump
-  // table was merged with some other one.
+  // Finally, remove dead jump tables.  This happens when the
+  // indirect jump was unreachable (and thus deleted).
   for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
     if (!JTIsLive.test(i)) {
       JTI->RemoveJumpTable(i);
@@ -1143,22 +1121,6 @@ ReoptimizeBlock:
           !IsBetterFallthrough(PriorTBB, MBB))
         DoTransform = false;
 
-      // We don't want to do this transformation if we have control flow like:
-      //   br cond BB2
-      // BB1:
-      //   ..
-      //   jmp BBX
-      // BB2:
-      //   ..
-      //   ret
-      //
-      // In this case, we could actually be moving the return block *into* a
-      // loop!
-      if (DoTransform && !MBB->succ_empty() &&
-          (!PriorTBB->canFallThrough() || PriorTBB->empty()))
-        DoTransform = false;
-
-
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 1a23be0c7d11..6d7cc51547d2 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -332,7 +332,7 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
                                            MachineBasicBlock::iterator MI,
                                            DebugLoc DL) const {
-  MCSymbol *Label = MBB.getParent()->getContext().GetOrCreateTemporarySymbol();
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
   BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index dbb5e1919177..b3e921689c7a 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -141,7 +141,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
       if (mii->isDebugValue())
-        OS << SlotIndex::getEmptyKey() << '\t' << *mii;
+        OS << "    \t" << *mii;
       else
         OS << getInstructionIndex(mii) << '\t' << *mii;
     }
@@ -583,6 +583,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
   MachineBasicBlock::iterator mi = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  // Skip over DBG_VALUE at the start of the MBB.
+  if (mi != E && mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      ;
+    if (mi == E)
+      // MBB is empty except for DBG_VALUE's.
+      return;
+  }
+
   SlotIndex baseIndex = MIIdx;
   SlotIndex start = baseIndex;
   if (getInstructionFromIndex(baseIndex) == 0)
@@ -591,12 +601,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   SlotIndex end = baseIndex;
   bool SeenDefUse = false;
 
-  MachineBasicBlock::iterator E = MBB->end();  
   while (mi != E) {
-    while (mi != E && mi->isDebugValue())
-      ++mi;
-    if (mi == E)
-      break;
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
@@ -613,10 +618,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       break;
     }
 
-    ++mi;
-    if (mi != E && !mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      // Skip over DBG_VALUE.
+      ;
+    if (mi != E)
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-    }
   }
 
   // Live-in register might not be used at all.
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 37f3d22630d4..5772b2f2d189 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -88,18 +88,15 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MCContext &ctx)
   : Fn(F), Target(TM), Ctx(ctx) {
   if (TM.getRegisterInfo())
-    RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
-                  MachineRegisterInfo(*TM.getRegisterInfo());
+    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
-                  MachineFrameInfo(*TM.getFrameInfo());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
-  ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
-                     MachineConstantPool(TM.getTargetData());
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
   FunctionNumber = FunctionNum;
   JumpTableInfo = 0;
@@ -132,7 +129,7 @@ MachineJumpTableInfo *MachineFunction::
 getOrCreateJumpTableInfo(unsigned EntryKind) {
   if (JumpTableInfo) return JumpTableInfo;
   
-  JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+  JumpTableInfo = new (Allocator)
     MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
   return JumpTableInfo;
 }
@@ -229,14 +226,13 @@ MachineMemOperand *
 MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
                                       int64_t o, uint64_t s,
                                       unsigned base_alignment) {
-  return new (Allocator.Allocate<MachineMemOperand>())
-             MachineMemOperand(v, f, o, s, base_alignment);
+  return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
 }
 
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
-  return new (Allocator.Allocate<MachineMemOperand>())
+  return new (Allocator)
              MachineMemOperand(MMO->getValue(), MMO->getFlags(),
                                int64_t(uint64_t(MMO->getOffset()) +
                                        uint64_t(Offset)),
@@ -600,17 +596,15 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   return ~0;
 }
 
-/// getJumpTableIndex - Create a new jump table entry in the jump table info
-/// or return an existing one.
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
 ///
-unsigned MachineJumpTableInfo::getJumpTableIndex(
+unsigned MachineJumpTableInfo::createJumpTableIndex(
                                const std::vector<MachineBasicBlock*> &DestBBs) {
   assert(!DestBBs.empty() && "Cannot create an empty jump table!");
   JumpTables.push_back(MachineJumpTableEntry(DestBBs));
   return JumpTables.size()-1;
 }
 
-
 /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
 /// the jump tables to branch to New instead.
 bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 194fc14848bc..2c69065b1427 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -739,7 +739,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
   
     // Physical registers and those that are not live-out of the block are
     // killed/dead at their last use/def within this block.
-    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn)
+    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
       if (MO.isUse()) {
         // Don't mark uses that are tied to defs as kills.
         if (!MI->isRegTiedToDefOperand(idx))
@@ -747,6 +747,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       } else {
         MO.setIsDead(true);
       }
+    }
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b5af2c134d3b..63ca8e67000e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -851,8 +851,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::MERGE_VALUES:
   case ISD::EH_RETURN:
   case ISD::FRAME_TO_ARGS_OFFSET:
-  case ISD::FP16_TO_FP32:
-  case ISD::FP32_TO_FP16:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8363c3af2121..ed5f24c1cc95 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2167,7 +2167,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       } else
         return LdOp;
     } else {
-      unsigned NumElts = WidenWidth / LdWidth;
+      unsigned NumElts = WidenWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
       SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
       return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 480c0680880d..ed9146dc08d9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -906,8 +906,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantSDNode>();
-    new (N) ConstantSDNode(isT, &Val, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -950,8 +949,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
       return SDValue(N, 0);
 
   if (!N) {
-    N = NodeAllocator.Allocate<ConstantFPSDNode>();
-    new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -1010,8 +1008,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
-  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
+  SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT,
+                                                      Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1026,8 +1024,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
-  new (N) FrameIndexSDNode(FI, VT, isTarget);
+  SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1046,8 +1043,8 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
-  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
+  SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+                                                  TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1072,8 +1069,8 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1099,8 +1096,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1114,8 +1111,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
-  new (N) BasicBlockSDNode(MBB);
+  SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1130,8 +1126,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
     ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
 
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<VTSDNode>();
-  new (N) VTSDNode(VT);
+  N = new (NodeAllocator) VTSDNode(VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1139,8 +1134,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
 SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1151,8 +1145,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
     TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
                                                                TargetFlags)];
   if (N) return SDValue(N, 0);
-  N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -1162,8 +1155,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
     CondCodeNodes.resize(Cond+1);
 
   if (CondCodeNodes[Cond] == 0) {
-    CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
-    new (N) CondCodeSDNode(Cond);
+    CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
     CondCodeNodes[Cond] = N;
     AllNodes.push_back(N);
   }
@@ -1268,8 +1260,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
   memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
 
-  ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
-  new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  ShuffleVectorSDNode *N =
+    new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1292,8 +1284,8 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
-  new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+  CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+                                                           Code);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1307,8 +1299,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
-  new (N) RegisterSDNode(RegNo, VT);
+  SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1323,8 +1314,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   
-  SDNode *N = NodeAllocator.Allocate<EHLabelSDNode>();
-  new (N) EHLabelSDNode(dl, Root, Label);
+  SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1344,8 +1334,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
-  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1363,8 +1352,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
-  new (N) SrcValueSDNode(V);
+  SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -2313,8 +2301,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<SDNode>();
-  new (N) SDNode(Opcode, DL, getVTList(VT));
+  SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
   CSEMap.InsertNode(N, IP);
 
   AllNodes.push_back(N);
@@ -2542,12 +2529,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<UnarySDNode>();
-    new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
   }
 
   AllNodes.push_back(N);
@@ -2975,12 +2960,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<BinarySDNode>();
-    new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
   }
 
   AllNodes.push_back(N);
@@ -3053,12 +3036,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<TernarySDNode>();
-    new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
   }
 
   AllNodes.push_back(N);
@@ -3659,8 +3640,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3722,8 +3703,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
-  new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3801,12 +3782,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
       return SDValue(E, 0);
     }
 
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
-    new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
   }
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3879,8 +3860,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
     cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
-  new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
+  SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+                                             MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -3961,8 +3942,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4025,8 +4006,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4048,10 +4029,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
-  new (N) StoreSDNode(Ops, dl, VTs, AM,
-                      ST->isTruncatingStore(), ST->getMemoryVT(),
-                      ST->getMemOperand());
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+                                              ST->isTruncatingStore(),
+                                              ST->getMemoryVT(),
+                                              ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -4122,12 +4103,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
       return SDValue(E, 0);
 
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = NodeAllocator.Allocate<SDNode>();
-    new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
   }
 
   AllNodes.push_back(N);
@@ -4190,32 +4169,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
       return SDValue(E, 0);
 
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
     CSEMap.InsertNode(N, IP);
   } else {
     if (NumOps == 1) {
-      N = NodeAllocator.Allocate<UnarySDNode>();
-      new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
     } else if (NumOps == 2) {
-      N = NodeAllocator.Allocate<BinarySDNode>();
-      new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
     } else if (NumOps == 3) {
-      N = NodeAllocator.Allocate<TernarySDNode>();
-      new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
     } else {
-      N = NodeAllocator.Allocate<SDNode>();
-      new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
     }
   }
   AllNodes.push_back(N);
@@ -4640,7 +4613,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
         // remainder of the current SelectionDAG iteration, so we can allocate
         // the operands directly out of a pool with no recycling metadata.
         MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
-                        Ops, NumOps);
+                         Ops, NumOps);
       else
         MN->InitOperands(MN->LocalOperands, Ops, NumOps);
       MN->OperandsNeedDelete = false;
@@ -4814,8 +4787,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
   }
 
   // Allocate a new MachineSDNode.
-  N = NodeAllocator.Allocate<MachineSDNode>();
-  new (N) MachineSDNode(~Opcode, DL, VTs);
+  N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
 
   // Initialize the operands list.
   if (NumOps > array_lengthof(N->LocalOperands))
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3d9a4d523b61..12096b9a68bb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1675,11 +1675,10 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
     }
   }
 
-  // Create a jump table index for this jump table, or return an existing
-  // one.
+  // Create a jump table index for this jump table.
   unsigned JTEncoding = TLI.getJumpTableEncoding();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
-                       ->getJumpTableIndex(DestBBs);
+                       ->createJumpTableIndex(DestBBs);
 
   // Set the jump table information so that we can codegen it as a second
   // MachineBasicBlock
@@ -2592,6 +2591,11 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) {
       }
 
       Ty = StTy->getElementType(Field);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      
+      // Offset canonically 0 for unions, but type changes
+      Ty = UnTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
 
@@ -4277,6 +4281,9 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
          --BBI) {
       if (&*BBI == I)
         break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
           !BBI->isSafeToSpeculativelyExecute())
         return false;
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 5c621181cd93..97e858f09c82 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -1639,11 +1640,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Save a copy of the virtual register live interval. We'll manually
   // merge this into the "real" physical register live interval this is
   // coalesced with.
-  LiveInterval *SavedLI = 0;
+  OwningPtr<LiveInterval> SavedLI;
   if (RealDstReg)
-    SavedLI = li_->dupInterval(&SrcInt);
+    SavedLI.reset(li_->dupInterval(&SrcInt));
   else if (RealSrcReg)
-    SavedLI = li_->dupInterval(&DstInt);
+    SavedLI.reset(li_->dupInterval(&DstInt));
 
   // Check if it is necessary to propagate "isDead" property.
   if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
@@ -1853,7 +1854,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // Manually deleted the live interval copy.
   if (SavedLI) {
     SavedLI->clear();
-    delete SavedLI;
+    SavedLI.reset();
   }
 
   // If resulting interval has a preference that no longer fits because of subreg
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index fa3785dc0795..aa6e2b4e87e3 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -495,7 +495,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (InstrCount == MaxDuplicateCount) return false;
     // Remember if we saw a call.
     if (I->getDesc().isCall()) HasCall = true;
-    if (!I->isPHI())
+    if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 4cf71dcabee0..dba0e1435651 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -12,11 +12,13 @@ add_llvm_library(LLVMMC
   MCInstPrinter.cpp
   MCMachOStreamer.cpp
   MCNullStreamer.cpp
+  MCObjectWriter.cpp
   MCSection.cpp
   MCSectionELF.cpp
   MCSectionMachO.cpp
   MCStreamer.cpp
   MCSymbol.cpp
   MCValue.cpp
+  MachObjectWriter.cpp
   TargetAsmBackend.cpp
   )
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7f3947114fe4..2025463a80be 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -29,7 +30,7 @@ namespace {
 class MCAsmStreamer : public MCStreamer {
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
-  MCInstPrinter *InstPrinter;
+  OwningPtr<MCInstPrinter> InstPrinter;
   MCCodeEmitter *Emitter;
   
   SmallString<128> CommentToEmit;
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 4cf8b7e522b3..beecf7e6b858 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -10,18 +10,16 @@
 #define DEBUG_TYPE "assembler"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -33,8 +31,6 @@
 #include <vector>
 using namespace llvm;
 
-class MachObjectWriter;
-
 STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
 
 // FIXME FIXME FIXME: There are number of places in this file where we convert
@@ -42,917 +38,6 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
 // object file, which may truncate it. We should detect that truncation where
 // invalid and report errors back.
 
-static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
-                          MachObjectWriter &MOW);
-
-static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
-
-/// isVirtualSection - Check if this is a section which does not actually exist
-/// in the object file.
-static bool isVirtualSection(const MCSection &Section) {
-  // FIXME: Lame.
-  const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-  return (SMO.getType() == MCSectionMachO::S_ZEROFILL);
-}
-
-static unsigned getFixupKindLog2Size(unsigned Kind) {
-  switch (Kind) {
-  default: llvm_unreachable("invalid fixup kind!");
-  case X86::reloc_pcrel_1byte:
-  case FK_Data_1: return 0;
-  case FK_Data_2: return 1;
-  case X86::reloc_pcrel_4byte:
-  case X86::reloc_riprel_4byte:
-  case FK_Data_4: return 2;
-  case FK_Data_8: return 3;
-  }
-}
-
-static bool isFixupKindPCRel(unsigned Kind) {
-  switch (Kind) {
-  default:
-    return false;
-  case X86::reloc_pcrel_1byte:
-  case X86::reloc_pcrel_4byte:
-  case X86::reloc_riprel_4byte:
-    return true;
-  }
-}
-
-class MachObjectWriter {
-  // See <mach-o/loader.h>.
-  enum {
-    Header_Magic32 = 0xFEEDFACE,
-    Header_Magic64 = 0xFEEDFACF
-  };
-
-  enum {
-    Header32Size = 28,
-    Header64Size = 32,
-    SegmentLoadCommand32Size = 56,
-    SegmentLoadCommand64Size = 72,
-    Section32Size = 68,
-    Section64Size = 80,
-    SymtabLoadCommandSize = 24,
-    DysymtabLoadCommandSize = 80,
-    Nlist32Size = 12,
-    Nlist64Size = 16,
-    RelocationInfoSize = 8
-  };
-
-  enum HeaderFileType {
-    HFT_Object = 0x1
-  };
-
-  enum HeaderFlags {
-    HF_SubsectionsViaSymbols = 0x2000
-  };
-
-  enum LoadCommandType {
-    LCT_Segment = 0x1,
-    LCT_Symtab = 0x2,
-    LCT_Dysymtab = 0xb,
-    LCT_Segment64 = 0x19
-  };
-
-  // See <mach-o/nlist.h>.
-  enum SymbolTypeType {
-    STT_Undefined = 0x00,
-    STT_Absolute  = 0x02,
-    STT_Section   = 0x0e
-  };
-
-  enum SymbolTypeFlags {
-    // If any of these bits are set, then the entry is a stab entry number (see
-    // <mach-o/stab.h>. Otherwise the other masks apply.
-    STF_StabsEntryMask = 0xe0,
-
-    STF_TypeMask       = 0x0e,
-    STF_External       = 0x01,
-    STF_PrivateExtern  = 0x10
-  };
-
-  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
-  /// symbol entry.
-  enum IndirectSymbolFlags {
-    ISF_Local    = 0x80000000,
-    ISF_Absolute = 0x40000000
-  };
-
-  /// RelocationFlags - Special flags for addresses.
-  enum RelocationFlags {
-    RF_Scattered = 0x80000000
-  };
-
-  enum RelocationInfoType {
-    RIT_Vanilla             = 0,
-    RIT_Pair                = 1,
-    RIT_Difference          = 2,
-    RIT_PreboundLazyPointer = 3,
-    RIT_LocalDifference     = 4
-  };
-
-  /// MachSymbolData - Helper struct for containing some precomputed information
-  /// on symbols.
-  struct MachSymbolData {
-    MCSymbolData *SymbolData;
-    uint64_t StringIndex;
-    uint8_t SectionIndex;
-
-    // Support lexicographic sorting.
-    bool operator<(const MachSymbolData &RHS) const {
-      const std::string &Name = SymbolData->getSymbol().getName();
-      return Name < RHS.SymbolData->getSymbol().getName();
-    }
-  };
-
-  raw_ostream &OS;
-  unsigned Is64Bit : 1;
-  unsigned IsLSB : 1;
-
-public:
-  MachObjectWriter(raw_ostream &_OS, bool _Is64Bit, bool _IsLSB = true)
-    : OS(_OS), Is64Bit(_Is64Bit), IsLSB(_IsLSB) {
-  }
-
-  /// @name Helper Methods
-  /// @{
-
-  void Write8(uint8_t Value) {
-    OS << char(Value);
-  }
-
-  void Write16(uint16_t Value) {
-    if (IsLSB) {
-      Write8(uint8_t(Value >> 0));
-      Write8(uint8_t(Value >> 8));
-    } else {
-      Write8(uint8_t(Value >> 8));
-      Write8(uint8_t(Value >> 0));
-    }
-  }
-
-  void Write32(uint32_t Value) {
-    if (IsLSB) {
-      Write16(uint16_t(Value >> 0));
-      Write16(uint16_t(Value >> 16));
-    } else {
-      Write16(uint16_t(Value >> 16));
-      Write16(uint16_t(Value >> 0));
-    }
-  }
-
-  void Write64(uint64_t Value) {
-    if (IsLSB) {
-      Write32(uint32_t(Value >> 0));
-      Write32(uint32_t(Value >> 32));
-    } else {
-      Write32(uint32_t(Value >> 32));
-      Write32(uint32_t(Value >> 0));
-    }
-  }
-
-  void WriteZeros(unsigned N) {
-    const char Zeros[16] = { 0 };
-
-    for (unsigned i = 0, e = N / 16; i != e; ++i)
-      OS << StringRef(Zeros, 16);
-
-    OS << StringRef(Zeros, N % 16);
-  }
-
-  void WriteString(StringRef Str, unsigned ZeroFillSize = 0) {
-    OS << Str;
-    if (ZeroFillSize)
-      WriteZeros(ZeroFillSize - Str.size());
-  }
-
-  /// @}
-
-  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols) {
-    uint32_t Flags = 0;
-
-    if (SubsectionsViaSymbols)
-      Flags |= HF_SubsectionsViaSymbols;
-
-    // struct mach_header (28 bytes) or
-    // struct mach_header_64 (32 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
-
-    // FIXME: Support cputype.
-    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
-    // FIXME: Support cpusubtype.
-    Write32(MachO::CPUSubType_I386_ALL);
-    Write32(HFT_Object);
-    Write32(NumLoadCommands);    // Object files have a single load command, the
-                                 // segment.
-    Write32(LoadCommandsSize);
-    Write32(Flags);
-    if (Is64Bit)
-      Write32(0); // reserved
-
-    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
-  }
-
-  /// WriteSegmentLoadCommand - Write a segment load command.
-  ///
-  /// \arg NumSections - The number of sections in this segment.
-  /// \arg SectionDataSize - The total size of the sections.
-  void WriteSegmentLoadCommand(unsigned NumSections,
-                               uint64_t VMSize,
-                               uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes) or
-    // struct segment_command_64 (72 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
-      SegmentLoadCommand32Size;
-    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
-    Write32(SegmentLoadCommandSize +
-            NumSections * (Is64Bit ? Section64Size : Section32Size));
-
-    WriteString("", 16);
-    if (Is64Bit) {
-      Write64(0); // vmaddr
-      Write64(VMSize); // vmsize
-      Write64(SectionDataStartOffset); // file offset
-      Write64(SectionDataSize); // file size
-    } else {
-      Write32(0); // vmaddr
-      Write32(VMSize); // vmsize
-      Write32(SectionDataStartOffset); // file offset
-      Write32(SectionDataSize); // file size
-    }
-    Write32(0x7); // maxprot
-    Write32(0x7); // initprot
-    Write32(NumSections);
-    Write32(0); // flags
-
-    assert(OS.tell() - Start == SegmentLoadCommandSize);
-  }
-
-  void WriteSection(const MCSectionData &SD, uint64_t FileOffset,
-                    uint64_t RelocationsStart, unsigned NumRelocations) {
-    // The offset is unused for virtual sections.
-    if (isVirtualSection(SD.getSection())) {
-      assert(SD.getFileSize() == 0 && "Invalid file size!");
-      FileOffset = 0;
-    }
-
-    // struct section (68 bytes) or
-    // struct section_64 (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    // FIXME: cast<> support!
-    const MCSectionMachO &Section =
-      static_cast<const MCSectionMachO&>(SD.getSection());
-    WriteString(Section.getSectionName(), 16);
-    WriteString(Section.getSegmentName(), 16);
-    if (Is64Bit) {
-      Write64(SD.getAddress()); // address
-      Write64(SD.getSize()); // size
-    } else {
-      Write32(SD.getAddress()); // address
-      Write32(SD.getSize()); // size
-    }
-    Write32(FileOffset);
-
-    unsigned Flags = Section.getTypeAndAttributes();
-    if (SD.hasInstructions())
-      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
-
-    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-    Write32(Log2_32(SD.getAlignment()));
-    Write32(NumRelocations ? RelocationsStart : 0);
-    Write32(NumRelocations);
-    Write32(Flags);
-    Write32(0); // reserved1
-    Write32(Section.getStubSize()); // reserved2
-    if (Is64Bit)
-      Write32(0); // reserved3
-
-    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
-  }
-
-  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
-                              uint32_t StringTableOffset,
-                              uint32_t StringTableSize) {
-    // struct symtab_command (24 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(LCT_Symtab);
-    Write32(SymtabLoadCommandSize);
-    Write32(SymbolOffset);
-    Write32(NumSymbols);
-    Write32(StringTableOffset);
-    Write32(StringTableSize);
-
-    assert(OS.tell() - Start == SymtabLoadCommandSize);
-  }
-
-  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
-                                uint32_t NumLocalSymbols,
-                                uint32_t FirstExternalSymbol,
-                                uint32_t NumExternalSymbols,
-                                uint32_t FirstUndefinedSymbol,
-                                uint32_t NumUndefinedSymbols,
-                                uint32_t IndirectSymbolOffset,
-                                uint32_t NumIndirectSymbols) {
-    // struct dysymtab_command (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(LCT_Dysymtab);
-    Write32(DysymtabLoadCommandSize);
-    Write32(FirstLocalSymbol);
-    Write32(NumLocalSymbols);
-    Write32(FirstExternalSymbol);
-    Write32(NumExternalSymbols);
-    Write32(FirstUndefinedSymbol);
-    Write32(NumUndefinedSymbols);
-    Write32(0); // tocoff
-    Write32(0); // ntoc
-    Write32(0); // modtaboff
-    Write32(0); // nmodtab
-    Write32(0); // extrefsymoff
-    Write32(0); // nextrefsyms
-    Write32(IndirectSymbolOffset);
-    Write32(NumIndirectSymbols);
-    Write32(0); // extreloff
-    Write32(0); // nextrel
-    Write32(0); // locreloff
-    Write32(0); // nlocrel
-
-    assert(OS.tell() - Start == DysymtabLoadCommandSize);
-  }
-
-  void WriteNlist(MachSymbolData &MSD) {
-    MCSymbolData &Data = *MSD.SymbolData;
-    const MCSymbol &Symbol = Data.getSymbol();
-    uint8_t Type = 0;
-    uint16_t Flags = Data.getFlags();
-    uint32_t Address = 0;
-
-    // Set the N_TYPE bits. See <mach-o/nlist.h>.
-    //
-    // FIXME: Are the prebound or indirect fields possible here?
-    if (Symbol.isUndefined())
-      Type = STT_Undefined;
-    else if (Symbol.isAbsolute())
-      Type = STT_Absolute;
-    else
-      Type = STT_Section;
-
-    // FIXME: Set STAB bits.
-
-    if (Data.isPrivateExtern())
-      Type |= STF_PrivateExtern;
-
-    // Set external bit.
-    if (Data.isExternal() || Symbol.isUndefined())
-      Type |= STF_External;
-
-    // Compute the symbol address.
-    if (Symbol.isDefined()) {
-      if (Symbol.isAbsolute()) {
-        llvm_unreachable("FIXME: Not yet implemented!");
-      } else {
-        Address = Data.getAddress();
-      }
-    } else if (Data.isCommon()) {
-      // Common symbols are encoded with the size in the address
-      // field, and their alignment in the flags.
-      Address = Data.getCommonSize();
-
-      // Common alignment is packed into the 'desc' bits.
-      if (unsigned Align = Data.getCommonAlignment()) {
-        unsigned Log2Size = Log2_32(Align);
-        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
-        if (Log2Size > 15)
-          llvm_report_error("invalid 'common' alignment '" +
-                            Twine(Align) + "'");
-        // FIXME: Keep this mask with the SymbolFlags enumeration.
-        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
-      }
-    }
-
-    // struct nlist (12 bytes)
-
-    Write32(MSD.StringIndex);
-    Write8(Type);
-    Write8(MSD.SectionIndex);
-
-    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
-    // value.
-    Write16(Flags);
-    if (Is64Bit)
-      Write64(Address);
-    else
-      Write32(Address);
-  }
-
-  struct MachRelocationEntry {
-    uint32_t Word0;
-    uint32_t Word1;
-  };
-  void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
-                                      MCAsmFixup &Fixup,
-                                      const MCValue &Target,
-                                     std::vector<MachRelocationEntry> &Relocs) {
-    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-    unsigned Type = RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = Target.getSymA();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      llvm_report_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = A_SD->getAddress();
-    uint32_t Value2 = 0;
-
-    if (const MCSymbol *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(*B);
-
-      if (!B_SD->getFragment())
-        llvm_report_error("symbol '" + B->getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      //
-      // Note that there is no longer any semantic difference between these two
-      // relocation types from the linkers point of view, this is done solely
-      // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
-      Value2 = B_SD->getAddress();
-    }
-
-    MachRelocationEntry MRE;
-    MRE.Word0 = ((Address   <<  0) |
-                 (Type      << 24) |
-                 (Log2Size  << 28) |
-                 (IsPCRel   << 30) |
-                 RF_Scattered);
-    MRE.Word1 = Value;
-    Relocs.push_back(MRE);
-
-    if (Type == RIT_Difference || Type == RIT_LocalDifference) {
-      MachRelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocs.push_back(MRE);
-    }
-  }
-
-  void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
-                             MCAsmFixup &Fixup,
-                             std::vector<MachRelocationEntry> &Relocs) {
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-
-    // FIXME: Share layout object.
-    MCAsmLayout Layout(Asm);
-
-    // Evaluate the fixup; if the value was resolved, no relocation is needed.
-    MCValue Target;
-    if (Asm.EvaluateFixup(Layout, Fixup, &Fragment, Target, Fixup.FixedValue))
-      return;
-
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel)
-      Offset += 1 << Log2Size;
-    if (Target.getSymB() ||
-        (Target.getSymA() && !Target.getSymA()->isUndefined() &&
-         Offset))
-      return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
-                                            Relocs);
-
-    // See <reloc.h>.
-    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
-    uint32_t Value = 0;
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      //
-      // FIXME: Currently, these are never generated (see code below). I cannot
-      // find a case where they are actually emitted.
-      Type = RIT_Vanilla;
-      Value = 0;
-    } else {
-      const MCSymbol *Symbol = Target.getSymA();
-      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
-
-      if (Symbol->isUndefined()) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        Value = 0;
-      } else {
-        // The index is the section ordinal.
-        //
-        // FIXME: O(N)
-        Index = 1;
-        MCAssembler::iterator it = Asm.begin(), ie = Asm.end();
-        for (; it != ie; ++it, ++Index)
-          if (&*it == SD->getFragment()->getParent())
-            break;
-        assert(it != ie && "Unable to find section index!");
-        Value = SD->getAddress();
-      }
-
-      Type = RIT_Vanilla;
-    }
-
-    // struct relocation_info (8 bytes)
-    MachRelocationEntry MRE;
-    MRE.Word0 = Address;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocs.push_back(MRE);
-  }
-
-  void BindIndirectSymbols(MCAssembler &Asm) {
-    // This is the point where 'as' creates actual symbols for indirect symbols
-    // (in the following two passes). It would be easier for us to do this
-    // sooner when we see the attribute, but that makes getting the order in the
-    // symbol table much more complicated than it is worth.
-    //
-    // FIXME: Revisit this when the dust settles.
-
-    // Bind non lazy symbol pointers first.
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
-      const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
-        continue;
-
-      Asm.getOrCreateSymbolData(*it->Symbol);
-    }
-
-    // Then lazy symbol pointers and symbol stubs.
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-      // FIXME: cast<> support!
-      const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
-        continue;
-
-      // Set the symbol type to undefined lazy, but only on construction.
-      //
-      // FIXME: Do not hardcode.
-      bool Created;
-      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
-      if (Created)
-        Entry.setFlags(Entry.getFlags() | 0x0001);
-    }
-  }
-
-  /// ComputeSymbolTable - Compute the symbol table data
-  ///
-  /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
-  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          std::vector<MachSymbolData> &LocalSymbolData,
-                          std::vector<MachSymbolData> &ExternalSymbolData,
-                          std::vector<MachSymbolData> &UndefinedSymbolData) {
-    // Build section lookup table.
-    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
-    unsigned Index = 1;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it, ++Index)
-      SectionIndexMap[&it->getSection()] = Index;
-    assert(Index <= 256 && "Too many sections!");
-
-    // Index 0 is always the empty string.
-    StringMap<uint64_t> StringIndexMap;
-    StringTable += '\x00';
-
-    // Build the symbol arrays and the string table, but only for non-local
-    // symbols.
-    //
-    // The particular order that we collect the symbols and create the string
-    // table, then sort the symbols is chosen to match 'as'. Even though it
-    // doesn't matter for correctness, this is important for letting us diff .o
-    // files.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary() &&
-          (!it->getFragment() ||
-           !Asm.getBackend().doesSectionRequireSymbols(
-             it->getFragment()->getParent()->getSection())))
-        continue;
-
-      if (!it->isExternal() && !Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isUndefined()) {
-        MSD.SectionIndex = 0;
-        UndefinedSymbolData.push_back(MSD);
-      } else if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        ExternalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        ExternalSymbolData.push_back(MSD);
-      }
-    }
-
-    // Now add the data for local symbols.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary() &&
-          (!it->getFragment() ||
-           !Asm.getBackend().doesSectionRequireSymbols(
-             it->getFragment()->getParent()->getSection())))
-        continue;
-
-      if (it->isExternal() || Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        LocalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        LocalSymbolData.push_back(MSD);
-      }
-    }
-
-    // External and undefined symbols are required to be in lexicographic order.
-    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
-
-    // Set the symbol indices.
-    Index = 0;
-    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-      LocalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-      ExternalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
-    // The string table is padded to a multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
-  }
-
-  void WriteObject(MCAssembler &Asm) {
-    unsigned NumSections = Asm.size();
-
-    // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm);
-
-    // Compute symbol table information.
-    SmallString<256> StringTable;
-    std::vector<MachSymbolData> LocalSymbolData;
-    std::vector<MachSymbolData> ExternalSymbolData;
-    std::vector<MachSymbolData> UndefinedSymbolData;
-    unsigned NumSymbols = Asm.symbol_size();
-
-    // No symbol table command is written if there are no symbols.
-    if (NumSymbols)
-      ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
-                         UndefinedSymbolData);
-
-    // The section data starts after the header, the segment load command (and
-    // section headers) and the symbol table.
-    unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = Is64Bit ?
-      SegmentLoadCommand64Size + NumSections * Section64Size :
-      SegmentLoadCommand32Size + NumSections * Section32Size;
-
-    // Add the symbol table load command sizes, if used.
-    if (NumSymbols) {
-      NumLoadCommands += 2;
-      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
-    }
-
-    // Compute the total size of the section data, as well as its file size and
-    // vm size.
-    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
-      + LoadCommandsSize;
-    uint64_t SectionDataSize = 0;
-    uint64_t SectionDataFileSize = 0;
-    uint64_t VMSize = 0;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      MCSectionData &SD = *it;
-
-      VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
-
-      if (isVirtualSection(SD.getSection()))
-        continue;
-
-      SectionDataSize = std::max(SectionDataSize,
-                                 SD.getAddress() + SD.getSize());
-      SectionDataFileSize = std::max(SectionDataFileSize,
-                                     SD.getAddress() + SD.getFileSize());
-    }
-
-    // The section data is padded to 4 bytes.
-    //
-    // FIXME: Is this machine dependent?
-    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
-    SectionDataFileSize += SectionDataPadding;
-
-    // Write the prolog, starting with the header and load command...
-    WriteHeader(NumLoadCommands, LoadCommandsSize,
-                Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand(NumSections, VMSize,
-                            SectionDataStart, SectionDataSize);
-
-    // ... and then the section headers.
-    //
-    // We also compute the section relocations while we do this. Note that
-    // computing relocation info will also update the fixup to have the correct
-    // value; this will overwrite the appropriate data in the fragment when it
-    // is written.
-    std::vector<MachRelocationEntry> RelocInfos;
-    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      MCSectionData &SD = *it;
-
-      // The assembler writes relocations in the reverse order they were seen.
-      //
-      // FIXME: It is probably more complicated than this.
-      unsigned NumRelocsStart = RelocInfos.size();
-      for (MCSectionData::reverse_iterator it2 = SD.rbegin(),
-             ie2 = SD.rend(); it2 != ie2; ++it2)
-        if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
-          for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
-            ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
-                                  RelocInfos);
-
-      unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
-      uint64_t SectionStart = SectionDataStart + SD.getAddress();
-      WriteSection(SD, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * RelocationInfoSize;
-    }
-
-    // Write the symbol table load command, if used.
-    if (NumSymbols) {
-      unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = LocalSymbolData.size();
-      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
-      unsigned NumExternalSymbols = ExternalSymbolData.size();
-      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
-      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
-      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
-      unsigned NumSymTabSymbols =
-        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
-      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
-      uint64_t IndirectSymbolOffset = 0;
-
-      // If used, the indirect symbols are written after the section data.
-      if (NumIndirectSymbols)
-        IndirectSymbolOffset = RelocTableEnd;
-
-      // The symbol table is written after the indirect symbol data.
-      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
-
-      // The string table is written after symbol table.
-      uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
-                                                Nlist32Size);
-      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                             StringTableOffset, StringTable.size());
-
-      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
-                               FirstExternalSymbol, NumExternalSymbols,
-                               FirstUndefinedSymbol, NumUndefinedSymbols,
-                               IndirectSymbolOffset, NumIndirectSymbols);
-    }
-
-    // Write the actual section data.
-    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-      WriteFileData(OS, *it, *this);
-
-    // Write the extra padding.
-    WriteZeros(SectionDataPadding);
-
-    // Write the relocation entries.
-    for (unsigned i = 0, e = RelocInfos.size(); i != e; ++i) {
-      Write32(RelocInfos[i].Word0);
-      Write32(RelocInfos[i].Word1);
-    }
-
-    // Write the symbol table data, if used.
-    if (NumSymbols) {
-      // Write the indirect symbol entries.
-      for (MCAssembler::indirect_symbol_iterator
-             it = Asm.indirect_symbol_begin(),
-             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-        // Indirect symbols in the non lazy symbol pointer section have some
-        // special handling.
-        const MCSectionMachO &Section =
-          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
-          // If this symbol is defined and internal, mark it as such.
-          if (it->Symbol->isDefined() &&
-              !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = ISF_Local;
-            if (it->Symbol->isAbsolute())
-              Flags |= ISF_Absolute;
-            Write32(Flags);
-            continue;
-          }
-        }
-
-        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
-      }
-
-      // FIXME: Check that offsets match computed ones.
-
-      // Write the symbol table entries.
-      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist(LocalSymbolData[i]);
-      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist(ExternalSymbolData[i]);
-      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist(UndefinedSymbolData[i]);
-
-      // Write the string table.
-      OS << StringTable.str();
-    }
-  }
-
-  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) {
-    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
-
-    // FIXME: Endianness assumption.
-    assert(Fixup.Offset + Size <= DF.getContents().size() &&
-           "Invalid fixup offset!");
-    for (unsigned i = 0; i != Size; ++i)
-      DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8));
-  }
-};
-
 /* *** */
 
 MCFragment::MCFragment() : Kind(FragmentType(~0)) {
@@ -1008,14 +93,149 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
 /* *** */
 
 MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
-                         raw_ostream &_OS)
-  : Context(_Context), Backend(_Backend), OS(_OS), SubsectionsViaSymbols(false)
+                         MCCodeEmitter &_Emitter, raw_ostream &_OS)
+  : Context(_Context), Backend(_Backend), Emitter(_Emitter),
+    OS(_OS), SubsectionsViaSymbols(false)
 {
 }
 
 MCAssembler::~MCAssembler() {
 }
 
+static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
+                                                const MCAsmFixup &Fixup,
+                                                const MCDataFragment *DF,
+                                                const MCValue Target,
+                                                const MCSection *BaseSection) {
+  // The effective fixup address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  //   - addr(<base symbol>) + <fixup offset from base symbol>
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0.
+  //
+  // The simple (Darwin, except on x86_64) way of dealing with this was to
+  // assume that any reference to a temporary symbol *must* be a temporary
+  // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+  // relocation to a temporary symbol (in the same section) is fully
+  // resolved. This also works in conjunction with absolutized .set, which
+  // requires the compiler to use .set to absolutize the differences between
+  // symbols which the compiler knows to be assembly time constants, so we don't
+  // need to worry about consider symbol differences fully resolved.
+
+  // Non-relative fixups are only resolved if constant.
+  if (!BaseSection)
+    return Target.isAbsolute();
+
+  // Otherwise, relative fixups are only resolved if not a difference and the
+  // target is a temporary in the same section.
+  if (Target.isAbsolute() || Target.getSymB())
+    return false;
+
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  if (!A->isTemporary() || !A->isInSection() ||
+      &A->getSection() != BaseSection)
+    return false;
+
+  return true;
+}
+
+static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
+                                          const MCAsmFixup &Fixup,
+                                          const MCDataFragment *DF,
+                                          const MCValue Target,
+                                          const MCSymbolData *BaseSymbol) {
+  // The effective fixup address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  //   - addr(BaseSymbol) + <fixup offset from base symbol>
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0.
+  //
+  // Note that "false" is almost always conservatively correct (it means we emit
+  // a relocation which is unnecessary), except when it would force us to emit a
+  // relocation which the target cannot encode.
+
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    // Modified symbol references cannot be resolved.
+    if (A->getKind() != MCSymbolRefExpr::VK_None)
+      return false;
+
+    A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol()));
+    if (!A_Base)
+      return false;
+  }
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    // Modified symbol references cannot be resolved.
+    if (B->getKind() != MCSymbolRefExpr::VK_None)
+      return false;
+
+    B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol()));
+    if (!B_Base)
+      return false;
+  }
+
+  // If there is no base, A and B have to be the same atom for this fixup to be
+  // fully resolved.
+  if (!BaseSymbol)
+    return A_Base == B_Base;
+
+  // Otherwise, B must be missing and A must be the base.
+  return !B_Base && BaseSymbol == A_Base;
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
+  // Non-temporary labels should always be visible to the linker.
+  if (!SD->getSymbol().isTemporary())
+    return true;
+
+  // Absolute temporary labels are never visible.
+  if (!SD->getFragment())
+    return false;
+
+  // Otherwise, check if the section requires symbols even for temporary labels.
+  return getBackend().doesSectionRequireSymbols(
+    SD->getFragment()->getParent()->getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section,
+                                                   uint64_t Address) const {
+  const MCSymbolData *Best = 0;
+  for (MCAssembler::const_symbol_iterator it = symbol_begin(),
+         ie = symbol_end(); it != ie; ++it) {
+    // Ignore non-linker visible symbols.
+    if (!isSymbolLinkerVisible(it))
+      continue;
+
+    // Ignore symbols not in the same section.
+    if (!it->getFragment() || it->getFragment()->getParent() != Section)
+      continue;
+
+    // Otherwise, find the closest symbol preceding this address (ties are
+    // resolved in favor of the last defined symbol).
+    if (it->getAddress() <= Address &&
+        (!Best || it->getAddress() >= Best->getAddress()))
+      Best = it;
+  }
+
+  return Best;
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+  // Linker visible symbols define atoms.
+  if (isSymbolLinkerVisible(SD))
+    return SD;
+
+  // Absolute and undefined symbols have no defining atom.
+  if (!SD->getFragment())
+    return 0;
+
+  // Otherwise, search by address.
+  return getAtomForAddress(SD->getFragment()->getParent(), SD->getAddress());
+}
+
 bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
                                 MCDataFragment *DF,
                                 MCValue &Target, uint64_t &Value) const {
@@ -1028,34 +248,47 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
 
   Value = Target.getConstant();
 
-  // FIXME: This "resolved" check isn't quite right. The assumption is that if
-  // we have a PCrel access to a temporary, then that temporary is in the same
-  // atom, and so the value is resolved. We need explicit atom's to implement
-  // this more precisely.
-  bool IsResolved = true, IsPCRel = isFixupKindPCRel(Fixup.Kind);
-  if (const MCSymbol *Symbol = Target.getSymA()) {
-    if (Symbol->isDefined())
-      Value += getSymbolData(*Symbol).getAddress();
+  bool IsPCRel =
+    Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel;
+  bool IsResolved = true;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    if (A->getSymbol().isDefined())
+      Value += getSymbolData(A->getSymbol()).getAddress();
     else
       IsResolved = false;
-
-    // With scattered symbols, we assume anything that isn't a PCrel temporary
-    // access can have an arbitrary value.
-    if (getBackend().hasScatteredSymbols() &&
-        (!IsPCRel || !Symbol->isTemporary()))
-      IsResolved = false;
   }
-  if (const MCSymbol *Symbol = Target.getSymB()) {
-    if (Symbol->isDefined())
-      Value -= getSymbolData(*Symbol).getAddress();
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    if (B->getSymbol().isDefined())
+      Value -= getSymbolData(B->getSymbol()).getAddress();
     else
       IsResolved = false;
+  }
 
-    // With scattered symbols, we assume anything that isn't a PCrel temporary
-    // access can have an arbitrary value.
-    if (getBackend().hasScatteredSymbols() &&
-        (!IsPCRel || !Symbol->isTemporary()))
-      IsResolved = false;
+  // If we are using scattered symbols, determine whether this value is actually
+  // resolved; scattering may cause atoms to move.
+  if (IsResolved && getBackend().hasScatteredSymbols()) {
+    if (getBackend().hasReliableSymbolDifference()) {
+      // If this is a PCrel relocation, find the base atom (identified by its
+      // symbol) that the fixup value is relative to.
+      const MCSymbolData *BaseSymbol = 0;
+      if (IsPCRel) {
+        BaseSymbol = getAtomForAddress(
+          DF->getParent(), DF->getAddress() + Fixup.Offset);
+        if (!BaseSymbol)
+          IsResolved = false;
+      }
+
+      if (IsResolved)
+        IsResolved = isScatteredFixupFullyResolved(*this, Fixup, DF, Target,
+                                                   BaseSymbol);
+    } else {
+      const MCSection *BaseSection = 0;
+      if (IsPCRel)
+        BaseSection = &DF->getParent()->getSection();
+
+      IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, DF, Target,
+                                                       BaseSection);
+    }
   }
 
   if (IsPCRel)
@@ -1127,7 +360,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
 
   // Set the section sizes.
   SD.setSize(Address - SD.getAddress());
-  if (isVirtualSection(SD.getSection()))
+  if (getBackend().isVirtualSection(SD.getSection()))
     SD.setFileSize(0);
   else
     SD.setFileSize(Address - SD.getAddress());
@@ -1138,7 +371,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
 /// the \arg Count is more than the maximum optimal nops.
 ///
 /// FIXME this is X86 32-bit specific and should move to a better place.
-static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
+static uint64_t WriteNopData(uint64_t Count, MCObjectWriter *OW) {
   static const uint8_t Nops[16][16] = {
     // nop
     {0x90},
@@ -1186,15 +419,14 @@ static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
     return 0;
 
   for (uint64_t i = 0; i < Count; i++)
-    MOW.Write8 (uint8_t(Nops[Count - 1][i]));
+    OW->Write8(uint8_t(Nops[Count - 1][i]));
 
   return Count;
 }
 
-/// WriteFileData - Write the \arg F data to the output file.
-static void WriteFileData(raw_ostream &OS, const MCFragment &F,
-                          MachObjectWriter &MOW) {
-  uint64_t Start = OS.tell();
+/// WriteFragmentData - Write the \arg F data to the output file.
+static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
+  uint64_t Start = OW->getStream().tell();
   (void) Start;
 
   ++EmittedFragments;
@@ -1218,7 +450,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     // the Count bytes.  Then if that did not fill any bytes or there are any
     // bytes left to fill use the the Value and ValueSize to fill the rest.
     if (AF.getEmitNops()) {
-      uint64_t NopByteCount = WriteNopData(Count, MOW);
+      uint64_t NopByteCount = WriteNopData(Count, OW);
       Count -= NopByteCount;
     }
 
@@ -1226,26 +458,17 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
       switch (AF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (AF.getValue())); break;
-      case 2: MOW.Write16(uint16_t(AF.getValue())); break;
-      case 4: MOW.Write32(uint32_t(AF.getValue())); break;
-      case 8: MOW.Write64(uint64_t(AF.getValue())); break;
+      case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+      case 2: OW->Write16(uint16_t(AF.getValue())); break;
+      case 4: OW->Write32(uint32_t(AF.getValue())); break;
+      case 8: OW->Write64(uint64_t(AF.getValue())); break;
       }
     }
     break;
   }
 
   case MCFragment::FT_Data: {
-    MCDataFragment &DF = cast<MCDataFragment>(F);
-
-    // Apply the fixups.
-    //
-    // FIXME: Move elsewhere.
-    for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(),
-           ie = DF.fixup_end(); it != ie; ++it)
-      MOW.ApplyFixup(*it, DF);
-
-    OS << cast<MCDataFragment>(F).getContents().str();
+    OW->WriteBytes(cast<MCDataFragment>(F).getContents().str());
     break;
   }
 
@@ -1255,10 +478,10 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
       switch (FF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (FF.getValue())); break;
-      case 2: MOW.Write16(uint16_t(FF.getValue())); break;
-      case 4: MOW.Write32(uint32_t(FF.getValue())); break;
-      case 8: MOW.Write64(uint64_t(FF.getValue())); break;
+      case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+      case 2: OW->Write16(uint16_t(FF.getValue())); break;
+      case 4: OW->Write32(uint32_t(FF.getValue())); break;
+      case 8: OW->Write64(uint64_t(FF.getValue())); break;
       }
     }
     break;
@@ -1268,7 +491,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
     for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i)
-      MOW.Write8(uint8_t(OF.getValue()));
+      OW->Write8(uint8_t(OF.getValue()));
 
     break;
   }
@@ -1279,30 +502,29 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
   }
   }
 
-  assert(OS.tell() - Start == F.getFileSize());
+  assert(OW->getStream().tell() - Start == F.getFileSize());
 }
 
-/// WriteFileData - Write the \arg SD data to the output file.
-static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
-                          MachObjectWriter &MOW) {
+void MCAssembler::WriteSectionData(const MCSectionData *SD,
+                                   MCObjectWriter *OW) const {
   // Ignore virtual sections.
-  if (isVirtualSection(SD.getSection())) {
-    assert(SD.getFileSize() == 0);
+  if (getBackend().isVirtualSection(SD->getSection())) {
+    assert(SD->getFileSize() == 0);
     return;
   }
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = OW->getStream().tell();
   (void) Start;
 
-  for (MCSectionData::const_iterator it = SD.begin(),
-         ie = SD.end(); it != ie; ++it)
-    WriteFileData(OS, *it, MOW);
+  for (MCSectionData::const_iterator it = SD->begin(),
+         ie = SD->end(); it != ie; ++it)
+    WriteFragmentData(*it, OW);
 
   // Add section padding.
-  assert(SD.getFileSize() >= SD.getSize() && "Invalid section sizes!");
-  MOW.WriteZeros(SD.getFileSize() - SD.getSize());
+  assert(SD->getFileSize() >= SD->getSize() && "Invalid section sizes!");
+  OW->WriteZeros(SD->getFileSize() - SD->getSize());
 
-  assert(OS.tell() - Start == SD.getFileSize());
+  assert(OW->getStream().tell() - Start == SD->getFileSize());
 }
 
 void MCAssembler::Finish() {
@@ -1318,13 +540,47 @@ void MCAssembler::Finish() {
       llvm::errs() << "assembler backend - post-layout\n--\n";
       dump(); });
 
-  // Write the object file.
-  //
   // FIXME: Factor out MCObjectWriter.
-  bool Is64Bit = StringRef(getBackend().getTarget().getName()) == "x86-64";
-  MachObjectWriter MOW(OS, Is64Bit);
-  MOW.WriteObject(*this);
+  llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
+  if (!Writer)
+    llvm_report_error("unable to create object writer!");
 
+  // Allow the object writer a chance to perform post-layout binding (for
+  // example, to set the index fields in the symbol data).
+  Writer->ExecutePostLayoutBinding(*this);
+
+  // Evaluate and apply the fixups, generating relocation entries as necessary.
+  //
+  // FIXME: Share layout object.
+  MCAsmLayout Layout(*this);
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+      if (!DF)
+        continue;
+
+      for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+             ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+        MCAsmFixup &Fixup = *it3;
+
+        // Evaluate the fixup.
+        MCValue Target;
+        uint64_t FixedValue;
+        if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) {
+          // The fixup was unresolved, we need a relocation. Inform the object
+          // writer of the relocation, and give it an opportunity to adjust the
+          // fixup value if need be.
+          Writer->RecordRelocation(*this, *DF, Fixup, Target, FixedValue);
+        }
+
+        getBackend().ApplyFixup(Fixup, *DF, FixedValue);
+      }
+    }
+  }
+
+  // Write the object file.
+  Writer->WriteObject(*this);
   OS.flush();
 }
 
@@ -1354,7 +610,7 @@ bool MCAssembler::LayoutOnce() {
     MCSectionData &SD = *it;
 
     // Skip virtual sections.
-    if (isVirtualSection(SD.getSection()))
+    if (getBackend().isVirtualSection(SD.getSection()))
       continue;
 
     // Align this section if necessary by adding padding bytes to the previous
@@ -1377,7 +633,7 @@ bool MCAssembler::LayoutOnce() {
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
 
-    if (!isVirtualSection(SD.getSection()))
+    if (!getBackend().isVirtualSection(SD.getSection()))
       continue;
 
     // Align this section if necessary by adding padding bytes to the previous
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index accb06c8af54..d51323785541 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -19,10 +19,10 @@ MCCodeEmitter::~MCCodeEmitter() {
 
 const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
   static const MCFixupKindInfo Builtins[] = {
-    { "FK_Data_1", 0, 8 },
-    { "FK_Data_2", 0, 16 },
-    { "FK_Data_4", 0, 32 },
-    { "FK_Data_8", 0, 64 }
+    { "FK_Data_1", 0, 8, 0 },
+    { "FK_Data_2", 0, 16, 0 },
+    { "FK_Data_4", 0, 32, 0 },
+    { "FK_Data_8", 0, 64, 0 }
   };
   
   assert(Kind <= 3 && "Unknown fixup kind");
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 70c89a2333af..37e828252602 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -49,17 +49,6 @@ MCSymbol *MCContext::CreateTempSymbol() {
                                     "tmp" + Twine(NextUniqueID++));
 }
 
-
-MCSymbol *MCContext::GetOrCreateTemporarySymbol(StringRef Name) {
-  // If there is no name, create a new anonymous symbol.
-  // FIXME: Remove this.  This form of the method should always take a name.
-  if (Name.empty())
-    return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) +
-                                      "tmp" + Twine(NextUniqueID++));
-  
-  return GetOrCreateSymbol(Name, true);
-}
-
 MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) {
   SmallString<128> NameSV;
   Name.toVector(NameSV);
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index a2ed20bb28ca..275994431ee0 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -30,7 +30,7 @@ void MCExpr::print(raw_ostream &OS) const {
   case MCExpr::SymbolRef: {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
-    
+
     // Parenthesize names that start with $ so that they don't look like
     // absolute names.
     if (Sym.getName()[0] == '$')
@@ -59,14 +59,14 @@ void MCExpr::print(raw_ostream &OS) const {
 
   case MCExpr::Binary: {
     const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
-    
+
     // Only print parens around the LHS if it is non-trivial.
     if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
       OS << *BE.getLHS();
     } else {
       OS << '(' << *BE.getLHS() << ')';
     }
-    
+
     switch (BE.getOpcode()) {
     default: assert(0 && "Invalid opcode!");
     case MCBinaryExpr::Add:
@@ -77,7 +77,7 @@ void MCExpr::print(raw_ostream &OS) const {
           return;
         }
       }
-        
+
       OS <<  '+';
       break;
     case MCBinaryExpr::And:  OS <<  '&'; break;
@@ -98,7 +98,7 @@ void MCExpr::print(raw_ostream &OS) const {
     case MCBinaryExpr::Sub:  OS <<  '-'; break;
     case MCBinaryExpr::Xor:  OS <<  '^'; break;
     }
-    
+
     // Only print parens around the LHS if it is non-trivial.
     if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
       OS << *BE.getRHS();
@@ -193,7 +193,7 @@ void MCTargetExpr::Anchor() {}
 
 bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
   MCValue Value;
-  
+
   if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
     return false;
 
@@ -201,16 +201,16 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
   return true;
 }
 
-static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, 
-                                const MCSymbol *RHS_B, int64_t RHS_Cst,
+static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+                                const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
                                 MCValue &Res) {
   // We can't add or subtract two symbols.
   if ((LHS.getSymA() && RHS_A) ||
       (LHS.getSymB() && RHS_B))
     return false;
 
-  const MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
-  const MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
+  const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
+  const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
   if (B) {
     // If we have a negated symbol, then we must have also have a non-negated
     // symbol in order to encode the expression. We can do this check later to
@@ -228,13 +228,14 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
   switch (getKind()) {
   case Target:
     return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
-      
+
   case Constant:
     Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
     return true;
 
   case SymbolRef: {
-    const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol();
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
 
     // Evaluate recursively if this is a variable.
     if (Sym.isVariable()) {
@@ -245,9 +246,12 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       // layout object and the target requests it.
       if (Layout && Res.getSymB() &&
           Layout->getAssembler().getBackend().hasAbsolutizedSet() &&
-          Res.getSymA()->isDefined() && Res.getSymB()->isDefined()) {
-        MCSymbolData &A = Layout->getAssembler().getSymbolData(*Res.getSymA());
-        MCSymbolData &B = Layout->getAssembler().getSymbolData(*Res.getSymB());
+          Res.getSymA()->getSymbol().isDefined() &&
+          Res.getSymB()->getSymbol().isDefined()) {
+        MCSymbolData &A =
+          Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol());
+        MCSymbolData &B =
+          Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol());
         Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset()
                            - B.getFragment()->getAddress() - B.getOffset()
                            + Res.getConstant());
@@ -256,7 +260,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       return true;
     }
 
-    Res = MCValue::get(&Sym, 0, 0);
+    Res = MCValue::get(SRE, 0, 0);
     return true;
   }
 
@@ -277,13 +281,13 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
       /// -(a - b + const) ==> (b - a - const)
       if (Value.getSymA() && !Value.getSymB())
         return false;
-      Res = MCValue::get(Value.getSymB(), Value.getSymA(), 
-                         -Value.getConstant()); 
+      Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+                         -Value.getConstant());
       break;
     case MCUnaryExpr::Not:
       if (!Value.isAbsolute())
         return false;
-      Res = MCValue::get(~Value.getConstant()); 
+      Res = MCValue::get(~Value.getConstant());
       break;
     case MCUnaryExpr::Plus:
       Res = Value;
@@ -296,7 +300,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
   case Binary: {
     const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
     MCValue LHSValue, RHSValue;
-    
+
     if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) ||
         !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout))
       return false;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 73b1074531d1..9504392bc1bc 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -45,7 +45,6 @@ class MCMachOStreamer : public MCStreamer {
 
 private:
   MCAssembler Assembler;
-  MCCodeEmitter *Emitter;
   MCSectionData *CurSectionData;
 
 private:
@@ -61,7 +60,7 @@ private:
 public:
   MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
                   raw_ostream &_OS, MCCodeEmitter *_Emitter)
-    : MCStreamer(Context), Assembler(Context, TAB, _OS), Emitter(_Emitter),
+    : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS),
       CurSectionData(0) {}
   ~MCMachOStreamer() {}
 
@@ -370,15 +369,12 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
     if (Inst.getOperand(i).isExpr())
       AddValueSymbols(Inst.getOperand(i).getExpr());
 
-  if (!Emitter)
-    llvm_unreachable("no code emitter available!");
-
   CurSectionData->setHasInstructions(true);
 
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
   // Add the fixups and data.
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 000000000000..d117e82b8a1f
--- /dev/null
+++ b/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,15 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 043a49d80c6c..c6ea16ce7b4d 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -19,10 +20,12 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
     return;
   }
 
-  OS << *getSymA();
+  getSymA()->print(OS);
 
-  if (getSymB())
-    OS << " - " << *getSymB();
+  if (getSymB()) {
+    OS << " - ";
+    getSymB()->print(OS);
+  }
 
   if (getConstant())
     OS << " + " << getConstant();
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 000000000000..4b08c226d144
--- /dev/null
+++ b/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,1109 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+// FIXME: Gross.
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: llvm_unreachable("invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+static bool isFixupKindPCRel(unsigned Kind) {
+  switch (Kind) {
+  default:
+    return false;
+  case X86::reloc_pcrel_1byte:
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    return true;
+  }
+}
+
+static bool isFixupKindRIPRel(unsigned Kind) {
+  return Kind == X86::reloc_riprel_4byte ||
+    Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+namespace {
+
+class MachObjectWriterImpl {
+  // See <mach-o/loader.h>.
+  enum {
+    Header_Magic32 = 0xFEEDFACE,
+    Header_Magic64 = 0xFEEDFACF
+  };
+
+  enum {
+    Header32Size = 28,
+    Header64Size = 32,
+    SegmentLoadCommand32Size = 56,
+    SegmentLoadCommand64Size = 72,
+    Section32Size = 68,
+    Section64Size = 80,
+    SymtabLoadCommandSize = 24,
+    DysymtabLoadCommandSize = 80,
+    Nlist32Size = 12,
+    Nlist64Size = 16,
+    RelocationInfoSize = 8
+  };
+
+  enum HeaderFileType {
+    HFT_Object = 0x1
+  };
+
+  enum HeaderFlags {
+    HF_SubsectionsViaSymbols = 0x2000
+  };
+
+  enum LoadCommandType {
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb,
+    LCT_Segment64 = 0x19
+  };
+
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+  /// symbol entry.
+  enum IndirectSymbolFlags {
+    ISF_Local    = 0x80000000,
+    ISF_Absolute = 0x40000000
+  };
+
+  /// RelocationFlags - Special flags for addresses.
+  enum RelocationFlags {
+    RF_Scattered = 0x80000000
+  };
+
+  enum RelocationInfoType {
+    RIT_Vanilla             = 0,
+    RIT_Pair                = 1,
+    RIT_Difference          = 2,
+    RIT_PreboundLazyPointer = 3,
+    RIT_LocalDifference     = 4
+  };
+
+  /// X86_64 uses its own relocation types.
+  enum RelocationInfoTypeX86_64 {
+    RIT_X86_64_Unsigned   = 0,
+    RIT_X86_64_Signed     = 1,
+    RIT_X86_64_Branch     = 2,
+    RIT_X86_64_GOTLoad    = 3,
+    RIT_X86_64_GOT        = 4,
+    RIT_X86_64_Subtractor = 5,
+    RIT_X86_64_Signed1    = 6,
+    RIT_X86_64_Signed2    = 7,
+    RIT_X86_64_Signed4    = 8
+  };
+
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const {
+      const std::string &Name = SymbolData->getSymbol().getName();
+      return Name < RHS.SymbolData->getSymbol().getName();
+    }
+  };
+
+  /// @name Relocation Data
+  /// @{
+
+  struct MachRelocationEntry {
+    uint32_t Word0;
+    uint32_t Word1;
+  };
+
+  llvm::DenseMap<const MCSectionData*,
+                 std::vector<MachRelocationEntry> > Relocations;
+
+  /// @}
+  /// @name Symbol Table Data
+  /// @{
+
+  SmallString<256> StringTable;
+  std::vector<MachSymbolData> LocalSymbolData;
+  std::vector<MachSymbolData> ExternalSymbolData;
+  std::vector<MachSymbolData> UndefinedSymbolData;
+
+  /// @}
+
+  MachObjectWriter *Writer;
+
+  raw_ostream &OS;
+
+  unsigned Is64Bit : 1;
+
+public:
+  MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit)
+    : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) {
+  }
+
+  void Write8(uint8_t Value) { Writer->Write8(Value); }
+  void Write16(uint16_t Value) { Writer->Write16(Value); }
+  void Write32(uint32_t Value) { Writer->Write32(Value); }
+  void Write64(uint64_t Value) { Writer->Write64(Value); }
+  void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
+  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    Writer->WriteBytes(Str, ZeroFillSize);
+  }
+
+  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                   bool SubsectionsViaSymbols) {
+    uint32_t Flags = 0;
+
+    if (SubsectionsViaSymbols)
+      Flags |= HF_SubsectionsViaSymbols;
+
+    // struct mach_header (28 bytes) or
+    // struct mach_header_64 (32 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
+
+    // FIXME: Support cputype.
+    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
+    // FIXME: Support cpusubtype.
+    Write32(MachO::CPUSubType_I386_ALL);
+    Write32(HFT_Object);
+    Write32(NumLoadCommands);    // Object files have a single load command, the
+                                 // segment.
+    Write32(LoadCommandsSize);
+    Write32(Flags);
+    if (Is64Bit)
+      Write32(0); // reserved
+
+    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
+  }
+
+  /// WriteSegmentLoadCommand - Write a segment load command.
+  ///
+  /// \arg NumSections - The number of sections in this segment.
+  /// \arg SectionDataSize - The total size of the sections.
+  void WriteSegmentLoadCommand(unsigned NumSections,
+                               uint64_t VMSize,
+                               uint64_t SectionDataStartOffset,
+                               uint64_t SectionDataSize) {
+    // struct segment_command (56 bytes) or
+    // struct segment_command_64 (72 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
+      SegmentLoadCommand32Size;
+    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
+    Write32(SegmentLoadCommandSize +
+            NumSections * (Is64Bit ? Section64Size : Section32Size));
+
+    WriteBytes("", 16);
+    if (Is64Bit) {
+      Write64(0); // vmaddr
+      Write64(VMSize); // vmsize
+      Write64(SectionDataStartOffset); // file offset
+      Write64(SectionDataSize); // file size
+    } else {
+      Write32(0); // vmaddr
+      Write32(VMSize); // vmsize
+      Write32(SectionDataStartOffset); // file offset
+      Write32(SectionDataSize); // file size
+    }
+    Write32(0x7); // maxprot
+    Write32(0x7); // initprot
+    Write32(NumSections);
+    Write32(0); // flags
+
+    assert(OS.tell() - Start == SegmentLoadCommandSize);
+  }
+
+  void WriteSection(const MCAssembler &Asm, const MCSectionData &SD,
+                    uint64_t FileOffset, uint64_t RelocationsStart,
+                    unsigned NumRelocations) {
+    // The offset is unused for virtual sections.
+    if (Asm.getBackend().isVirtualSection(SD.getSection())) {
+      assert(SD.getFileSize() == 0 && "Invalid file size!");
+      FileOffset = 0;
+    }
+
+    // struct section (68 bytes) or
+    // struct section_64 (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    // FIXME: cast<> support!
+    const MCSectionMachO &Section =
+      static_cast<const MCSectionMachO&>(SD.getSection());
+    WriteBytes(Section.getSectionName(), 16);
+    WriteBytes(Section.getSegmentName(), 16);
+    if (Is64Bit) {
+      Write64(SD.getAddress()); // address
+      Write64(SD.getSize()); // size
+    } else {
+      Write32(SD.getAddress()); // address
+      Write32(SD.getSize()); // size
+    }
+    Write32(FileOffset);
+
+    unsigned Flags = Section.getTypeAndAttributes();
+    if (SD.hasInstructions())
+      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+    Write32(Log2_32(SD.getAlignment()));
+    Write32(NumRelocations ? RelocationsStart : 0);
+    Write32(NumRelocations);
+    Write32(Flags);
+    Write32(0); // reserved1
+    Write32(Section.getStubSize()); // reserved2
+    if (Is64Bit)
+      Write32(0); // reserved3
+
+    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
+  }
+
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize) {
+    // struct symtab_command (24 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Symtab);
+    Write32(SymtabLoadCommandSize);
+    Write32(SymbolOffset);
+    Write32(NumSymbols);
+    Write32(StringTableOffset);
+    Write32(StringTableSize);
+
+    assert(OS.tell() - Start == SymtabLoadCommandSize);
+  }
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols) {
+    // struct dysymtab_command (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Dysymtab);
+    Write32(DysymtabLoadCommandSize);
+    Write32(FirstLocalSymbol);
+    Write32(NumLocalSymbols);
+    Write32(FirstExternalSymbol);
+    Write32(NumExternalSymbols);
+    Write32(FirstUndefinedSymbol);
+    Write32(NumUndefinedSymbols);
+    Write32(0); // tocoff
+    Write32(0); // ntoc
+    Write32(0); // modtaboff
+    Write32(0); // nmodtab
+    Write32(0); // extrefsymoff
+    Write32(0); // nextrefsyms
+    Write32(IndirectSymbolOffset);
+    Write32(NumIndirectSymbols);
+    Write32(0); // extreloff
+    Write32(0); // nextrel
+    Write32(0); // locreloff
+    Write32(0); // nlocrel
+
+    assert(OS.tell() - Start == DysymtabLoadCommandSize);
+  }
+
+  void WriteNlist(MachSymbolData &MSD) {
+    MCSymbolData &Data = *MSD.SymbolData;
+    const MCSymbol &Symbol = Data.getSymbol();
+    uint8_t Type = 0;
+    uint16_t Flags = Data.getFlags();
+    uint32_t Address = 0;
+
+    // Set the N_TYPE bits. See <mach-o/nlist.h>.
+    //
+    // FIXME: Are the prebound or indirect fields possible here?
+    if (Symbol.isUndefined())
+      Type = STT_Undefined;
+    else if (Symbol.isAbsolute())
+      Type = STT_Absolute;
+    else
+      Type = STT_Section;
+
+    // FIXME: Set STAB bits.
+
+    if (Data.isPrivateExtern())
+      Type |= STF_PrivateExtern;
+
+    // Set external bit.
+    if (Data.isExternal() || Symbol.isUndefined())
+      Type |= STF_External;
+
+    // Compute the symbol address.
+    if (Symbol.isDefined()) {
+      if (Symbol.isAbsolute()) {
+        llvm_unreachable("FIXME: Not yet implemented!");
+      } else {
+        Address = Data.getAddress();
+      }
+    } else if (Data.isCommon()) {
+      // Common symbols are encoded with the size in the address
+      // field, and their alignment in the flags.
+      Address = Data.getCommonSize();
+
+      // Common alignment is packed into the 'desc' bits.
+      if (unsigned Align = Data.getCommonAlignment()) {
+        unsigned Log2Size = Log2_32(Align);
+        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+        if (Log2Size > 15)
+          llvm_report_error("invalid 'common' alignment '" +
+                            Twine(Align) + "'");
+        // FIXME: Keep this mask with the SymbolFlags enumeration.
+        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+      }
+    }
+
+    // struct nlist (12 bytes)
+
+    Write32(MSD.StringIndex);
+    Write8(Type);
+    Write8(MSD.SectionIndex);
+
+    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+    // value.
+    Write16(Flags);
+    if (Is64Bit)
+      Write64(Address);
+    else
+      Write32(Address);
+  }
+
+  void RecordX86_64Relocation(const MCAssembler &Asm,
+                              const MCDataFragment &Fragment,
+                              const MCAsmFixup &Fixup, MCValue Target,
+                              uint64_t &FixedValue) {
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+
+    // See <reloc.h>.
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    int64_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    Value = Target.getConstant();
+
+    if (IsPCRel) {
+      // Compensate for the relocation offset, Darwin x86_64 relocations only
+      // have the addend and appear to have attempted to define it to be the
+      // actual expression addend without the PCrel bias. However, instructions
+      // with data following the relocation are not accomodated for (see comment
+      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
+      Value += 1 << Log2Size;
+    }
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      Type = RIT_X86_64_Unsigned;
+      Index = 0;
+
+      // FIXME: I believe this is broken, I don't think the linker can
+      // understand it. I think it would require a local relocation, but I'm not
+      // sure if that would work either. The official way to get an absolute
+      // PCrel relocation is to use an absolute symbol (which we don't support
+      // yet).
+      if (IsPCRel) {
+        IsExtern = 1;
+        Type = RIT_X86_64_Branch;
+      }
+    } else if (Target.getSymB()) { // A - B + constant
+      const MCSymbol *A = &Target.getSymA()->getSymbol();
+      MCSymbolData &A_SD = Asm.getSymbolData(*A);
+      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+      const MCSymbol *B = &Target.getSymB()->getSymbol();
+      MCSymbolData &B_SD = Asm.getSymbolData(*B);
+      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+      // Neither symbol can be modified.
+      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+        llvm_report_error("unsupported relocation of modified symbol");
+
+      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+      // implement most of these correctly.
+      if (IsPCRel)
+        llvm_report_error("unsupported pc-relative relocation of difference");
+
+      // We don't currently support any situation where one or both of the
+      // symbols would require a local relocation. This is almost certainly
+      // unused and may not be possible to encode correctly.
+      if (!A_Base || !B_Base)
+        llvm_report_error("unsupported local relocations in difference");
+
+      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
+      // a single SIGNED relocation); reject it for now.
+      if (A_Base == B_Base)
+        llvm_report_error("unsupported relocation with identical base");
+
+      Value += A_SD.getAddress() - A_Base->getAddress();
+      Value -= B_SD.getAddress() - B_Base->getAddress();
+
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+      Type = RIT_X86_64_Unsigned;
+
+      MachRelocationEntry MRE;
+      MRE.Word0 = Address;
+      MRE.Word1 = ((Index     <<  0) |
+                   (IsPCRel   << 24) |
+                   (Log2Size  << 25) |
+                   (IsExtern  << 27) |
+                   (Type      << 28));
+      Relocations[Fragment.getParent()].push_back(MRE);
+
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+      Type = RIT_X86_64_Subtractor;
+    } else {
+      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+      const MCSymbolData *Base = Asm.getAtom(&SD);
+
+      // x86_64 almost always uses external relocations, except when there is no
+      // symbol to use as a base address (a local symbol with no preceeding
+      // non-local symbol).
+      if (Base) {
+        Index = Base->getIndex();
+        IsExtern = 1;
+
+        // Add the local offset, if needed.
+        if (Base != &SD)
+          Value += SD.getAddress() - Base->getAddress();
+      } else {
+        // The index is the section ordinal.
+        //
+        // FIXME: O(N)
+        Index = 1;
+        MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
+          if (&*it == SD.getFragment()->getParent())
+            break;
+        assert(it != ie && "Unable to find section index!");
+        IsExtern = 0;
+        Value += SD.getAddress();
+
+        if (IsPCRel)
+          Value -= Address + (1 << Log2Size);
+      }
+
+      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+      if (IsPCRel) {
+        if (IsRIPRel) {
+          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+            // rewrite the movq to an leaq at link time if the symbol ends up in
+            // the same linkage unit.
+            if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load)
+              Type = RIT_X86_64_GOTLoad;
+            else
+              Type = RIT_X86_64_GOT;
+          } else if (Modifier != MCSymbolRefExpr::VK_None)
+            llvm_report_error("unsupported symbol modifier in relocation");
+          else
+            Type = RIT_X86_64_Signed;
+        } else {
+          if (Modifier != MCSymbolRefExpr::VK_None)
+            llvm_report_error("unsupported symbol modifier in branch "
+                              "relocation");
+
+          Type = RIT_X86_64_Branch;
+        }
+
+        // The Darwin x86_64 relocation format has a problem where it cannot
+        // encode an address (L<foo> + <constant>) which is outside the atom
+        // containing L<foo>. Generally, this shouldn't occur but it does happen
+        // when we have a RIPrel instruction with data following the relocation
+        // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment
+        // Darwin x86_64 uses, the offset is still negative and the linker has
+        // no way to recognize this.
+        //
+        // To work around this, Darwin uses several special relocation types to
+        // indicate the offsets. However, the specification or implementation of
+        // these seems to also be incomplete; they should adjust the addend as
+        // well based on the actual encoded instruction (the additional bias),
+        // but instead appear to just look at the final offset.
+        if (IsRIPRel) {
+          switch (-(Target.getConstant() + (1 << Log2Size))) {
+          case 1: Type = RIT_X86_64_Signed1; break;
+          case 2: Type = RIT_X86_64_Signed2; break;
+          case 4: Type = RIT_X86_64_Signed4; break;
+          }
+        }
+      } else {
+        if (Modifier == MCSymbolRefExpr::VK_GOT)
+          Type = RIT_X86_64_GOT;
+        else if (Modifier != MCSymbolRefExpr::VK_None)
+          llvm_report_error("unsupported symbol modifier in relocation");
+        else
+          Type = RIT_X86_64_Unsigned;
+      }
+    }
+
+    // x86_64 always writes custom values into the fixups.
+    FixedValue = Value;
+
+    // struct relocation_info (8 bytes)
+    MachRelocationEntry MRE;
+    MRE.Word0 = Address;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void RecordScatteredRelocation(const MCAssembler &Asm,
+                                 const MCFragment &Fragment,
+                                 const MCAsmFixup &Fixup, MCValue Target,
+                                 uint64_t &FixedValue) {
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+    unsigned Type = RIT_Vanilla;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      llvm_report_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = A_SD->getAddress();
+    uint32_t Value2 = 0;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        llvm_report_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      //
+      // Note that there is no longer any semantic difference between these two
+      // relocation types from the linkers point of view, this is done solely
+      // for pedantic compatibility with 'as'.
+      Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
+      Value2 = B_SD->getAddress();
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    if (Type == RIT_Difference || Type == RIT_LocalDifference) {
+      MachRelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (RIT_Pair  << 24) |
+                   (Log2Size  << 28) |
+                   (IsPCRel   << 30) |
+                   RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment.getParent()].push_back(MRE);
+    }
+
+    MachRelocationEntry MRE;
+    MRE.Word0 = ((Address   <<  0) |
+                 (Type      << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void RecordRelocation(const MCAssembler &Asm, const MCDataFragment &Fragment,
+                        const MCAsmFixup &Fixup, MCValue Target,
+                        uint64_t &FixedValue) {
+    if (Is64Bit) {
+      RecordX86_64Relocation(Asm, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+
+    // If this is a difference or a defined symbol plus an offset, then we need
+    // a scattered relocation entry.
+    uint32_t Offset = Target.getConstant();
+    if (IsPCRel)
+      Offset += 1 << Log2Size;
+    if (Target.getSymB() ||
+        (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() &&
+         Offset)) {
+      RecordScatteredRelocation(Asm, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    // See <reloc.h>.
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+    uint32_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      //
+      // FIXME: Currently, these are never generated (see code below). I cannot
+      // find a case where they are actually emitted.
+      Type = RIT_Vanilla;
+      Value = 0;
+    } else {
+      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
+
+      if (Symbol->isUndefined()) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        Value = 0;
+      } else {
+        // The index is the section ordinal.
+        //
+        // FIXME: O(N)
+        Index = 1;
+        MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
+          if (&*it == SD->getFragment()->getParent())
+            break;
+        assert(it != ie && "Unable to find section index!");
+        Value = SD->getAddress();
+      }
+
+      Type = RIT_Vanilla;
+    }
+
+    // struct relocation_info (8 bytes)
+    MachRelocationEntry MRE;
+    MRE.Word0 = Address;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment.getParent()].push_back(MRE);
+  }
+
+  void BindIndirectSymbols(MCAssembler &Asm) {
+    // This is the point where 'as' creates actual symbols for indirect symbols
+    // (in the following two passes). It would be easier for us to do this
+    // sooner when we see the attribute, but that makes getting the order in the
+    // symbol table much more complicated than it is worth.
+    //
+    // FIXME: Revisit this when the dust settles.
+
+    // Bind non lazy symbol pointers first.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+        continue;
+
+      Asm.getOrCreateSymbolData(*it->Symbol);
+    }
+
+    // Then lazy symbol pointers and symbol stubs.
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // FIXME: cast<> support!
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+        continue;
+
+      // Set the symbol type to undefined lazy, but only on construction.
+      //
+      // FIXME: Do not hardcode.
+      bool Created;
+      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+      if (Created)
+        Entry.setFlags(Entry.getFlags() | 0x0001);
+    }
+  }
+
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData) {
+    // Build section lookup table.
+    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+    unsigned Index = 1;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it, ++Index)
+      SectionIndexMap[&it->getSection()] = Index;
+    assert(Index <= 256 && "Too many sections!");
+
+    // Index 0 is always the empty string.
+    StringMap<uint64_t> StringIndexMap;
+    StringTable += '\x00';
+
+    // Build the symbol arrays and the string table, but only for non-local
+    // symbols.
+    //
+    // The particular order that we collect the symbols and create the string
+    // table, then sort the symbols is chosen to match 'as'. Even though it
+    // doesn't matter for correctness, this is important for letting us diff .o
+    // files.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it))
+        continue;
+
+      if (!it->isExternal() && !Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isUndefined()) {
+        MSD.SectionIndex = 0;
+        UndefinedSymbolData.push_back(MSD);
+      } else if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        ExternalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        ExternalSymbolData.push_back(MSD);
+      }
+    }
+
+    // Now add the data for local symbols.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it))
+        continue;
+
+      if (it->isExternal() || Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        LocalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        LocalSymbolData.push_back(MSD);
+      }
+    }
+
+    // External and undefined symbols are required to be in lexicographic order.
+    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+    // Set the symbol indices.
+    Index = 0;
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      LocalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+    // The string table is padded to a multiple of 4.
+    while (StringTable.size() % 4)
+      StringTable += '\x00';
+  }
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm) {
+    // Create symbol data for any indirect symbols.
+    BindIndirectSymbols(Asm);
+
+    // Compute symbol table information and bind symbol indices.
+    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                       UndefinedSymbolData);
+  }
+
+  void WriteObject(const MCAssembler &Asm) {
+    unsigned NumSections = Asm.size();
+
+    // The section data starts after the header, the segment load command (and
+    // section headers) and the symbol table.
+    unsigned NumLoadCommands = 1;
+    uint64_t LoadCommandsSize = Is64Bit ?
+      SegmentLoadCommand64Size + NumSections * Section64Size :
+      SegmentLoadCommand32Size + NumSections * Section32Size;
+
+    // Add the symbol table load command sizes, if used.
+    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+      UndefinedSymbolData.size();
+    if (NumSymbols) {
+      NumLoadCommands += 2;
+      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+    }
+
+    // Compute the total size of the section data, as well as its file size and
+    // vm size.
+    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
+      + LoadCommandsSize;
+    uint64_t SectionDataSize = 0;
+    uint64_t SectionDataFileSize = 0;
+    uint64_t VMSize = 0;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      const MCSectionData &SD = *it;
+
+      VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
+
+      if (Asm.getBackend().isVirtualSection(SD.getSection()))
+        continue;
+
+      SectionDataSize = std::max(SectionDataSize,
+                                 SD.getAddress() + SD.getSize());
+      SectionDataFileSize = std::max(SectionDataFileSize,
+                                     SD.getAddress() + SD.getFileSize());
+    }
+
+    // The section data is padded to 4 bytes.
+    //
+    // FIXME: Is this machine dependent?
+    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+    SectionDataFileSize += SectionDataPadding;
+
+    // Write the prolog, starting with the header and load command...
+    WriteHeader(NumLoadCommands, LoadCommandsSize,
+                Asm.getSubsectionsViaSymbols());
+    WriteSegmentLoadCommand(NumSections, VMSize,
+                            SectionDataStart, SectionDataSize);
+
+    // ... and then the section headers.
+    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      unsigned NumRelocs = Relocs.size();
+      uint64_t SectionStart = SectionDataStart + it->getAddress();
+      WriteSection(Asm, *it, SectionStart, RelocTableEnd, NumRelocs);
+      RelocTableEnd += NumRelocs * RelocationInfoSize;
+    }
+
+    // Write the symbol table load command, if used.
+    if (NumSymbols) {
+      unsigned FirstLocalSymbol = 0;
+      unsigned NumLocalSymbols = LocalSymbolData.size();
+      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+      unsigned NumExternalSymbols = ExternalSymbolData.size();
+      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+      unsigned NumSymTabSymbols =
+        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+      uint64_t IndirectSymbolOffset = 0;
+
+      // If used, the indirect symbols are written after the section data.
+      if (NumIndirectSymbols)
+        IndirectSymbolOffset = RelocTableEnd;
+
+      // The symbol table is written after the indirect symbol data.
+      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+      // The string table is written after symbol table.
+      uint64_t StringTableOffset =
+        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
+                                                Nlist32Size);
+      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                             StringTableOffset, StringTable.size());
+
+      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                               FirstExternalSymbol, NumExternalSymbols,
+                               FirstUndefinedSymbol, NumUndefinedSymbols,
+                               IndirectSymbolOffset, NumIndirectSymbols);
+    }
+
+    // Write the actual section data.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it)
+      Asm.WriteSectionData(it, Writer);
+
+    // Write the extra padding.
+    WriteZeros(SectionDataPadding);
+
+    // Write the relocation entries.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      // Write the section relocation entries, in reverse order to match 'as'
+      // (approximately, the exact algorithm is more complicated than this).
+      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+        Write32(Relocs[e - i - 1].Word0);
+        Write32(Relocs[e - i - 1].Word1);
+      }
+    }
+
+    // Write the symbol table data, if used.
+    if (NumSymbols) {
+      // Write the indirect symbol entries.
+      for (MCAssembler::const_indirect_symbol_iterator
+             it = Asm.indirect_symbol_begin(),
+             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+        // Indirect symbols in the non lazy symbol pointer section have some
+        // special handling.
+        const MCSectionMachO &Section =
+          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+          // If this symbol is defined and internal, mark it as such.
+          if (it->Symbol->isDefined() &&
+              !Asm.getSymbolData(*it->Symbol).isExternal()) {
+            uint32_t Flags = ISF_Local;
+            if (it->Symbol->isAbsolute())
+              Flags |= ISF_Absolute;
+            Write32(Flags);
+            continue;
+          }
+        }
+
+        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+      }
+
+      // FIXME: Check that offsets match computed ones.
+
+      // Write the symbol table entries.
+      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+        WriteNlist(LocalSymbolData[i]);
+      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+        WriteNlist(ExternalSymbolData[i]);
+      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+        WriteNlist(UndefinedSymbolData[i]);
+
+      // Write the string table.
+      OS << StringTable.str();
+    }
+  }
+};
+
+}
+
+MachObjectWriter::MachObjectWriter(raw_ostream &OS,
+                                   bool Is64Bit,
+                                   bool IsLittleEndian)
+  : MCObjectWriter(OS, IsLittleEndian)
+{
+  Impl = new MachObjectWriterImpl(this, Is64Bit);
+}
+
+MachObjectWriter::~MachObjectWriter() {
+  delete (MachObjectWriterImpl*) Impl;
+}
+
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+  ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+}
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCDataFragment &Fragment,
+                                        const MCAsmFixup &Fixup, MCValue Target,
+                                        uint64_t &FixedValue) {
+  ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Fragment, Fixup,
+                                                   Target, FixedValue);
+}
+
+void MachObjectWriter::WriteObject(const MCAssembler &Asm) {
+  ((MachObjectWriterImpl*) Impl)->WriteObject(Asm);
+}
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
index 918d2727daa6..bbfddbe9e71f 100644
--- a/lib/MC/TargetAsmBackend.cpp
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -11,7 +11,10 @@
 using namespace llvm;
 
 TargetAsmBackend::TargetAsmBackend(const Target &T)
-  : TheTarget(T)
+  : TheTarget(T),
+    HasAbsolutizedSet(false),
+    HasReliableSymbolDifference(false),
+    HasScatteredSymbols(false)
 {
 }
 
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 954dc77dff1e..3f467fe1b692 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
@@ -130,6 +131,15 @@ bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
   return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
 }
 
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+  return FoldingSetNodeIDRef(New, Bits.size());
+}
 
 //===----------------------------------------------------------------------===//
 /// Helper functions for FoldingSetImpl.
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index c8ec68aab17f..56bf9e76e60a 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -39,8 +39,8 @@ static SmartMutex<true> SignalsMutex;
 /// InterruptFunction - The function to call if ctrl-c is pressed.
 static void (*InterruptFunction)() = 0;
 
-static std::vector<sys::Path> *FilesToRemove = 0;
-static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
 
 // IntSigs - Signals that may interrupt the program at any time.
 static const int IntSigs[] = {
@@ -126,11 +126,10 @@ static RETSIGTYPE SignalHandler(int Sig) {
   sigprocmask(SIG_UNBLOCK, &SigMask, 0);
 
   SignalsMutex.acquire();
-  if (FilesToRemove != 0)
-    while (!FilesToRemove->empty()) {
-      FilesToRemove->back().eraseFromDisk(true);
-      FilesToRemove->pop_back();
-    }
+  while (!FilesToRemove.empty()) {
+    FilesToRemove.back().eraseFromDisk(true);
+    FilesToRemove.pop_back();
+  }
 
   if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
     if (InterruptFunction) {
@@ -149,9 +148,8 @@ static RETSIGTYPE SignalHandler(int Sig) {
   SignalsMutex.release();
 
   // Otherwise if it is a fault (like SEGV) run any handler.
-  if (CallBacksToRun)
-    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+    CallBacksToRun[i].first(CallBacksToRun[i].second);
 }
 
 
@@ -167,10 +165,7 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
 bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
                                    std::string* ErrMsg) {
   SignalsMutex.acquire();
-  if (FilesToRemove == 0)
-    FilesToRemove = new std::vector<sys::Path>();
-
-  FilesToRemove->push_back(Filename);
+  FilesToRemove.push_back(Filename);
 
   SignalsMutex.release();
 
@@ -182,9 +177,7 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
 /// to the process.  The handler can have a cookie passed to it to identify
 /// what instance of the handler it is.
 void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
-  if (CallBacksToRun == 0)
-    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
-  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
   RegisterHandlers();
 }
 
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index ddeb1b994e84..ea62c3330e64 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -35,6 +35,10 @@ namespace ARM_AM {
     add = '+', sub = '-'
   };
 
+  static inline const char *getAddrOpcStr(AddrOpc Op) {
+    return Op == sub ? "-" : "";
+  }
+
   static inline const char *getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
     default: assert(0 && "Unknown shift opc!");
@@ -78,16 +82,6 @@ namespace ARM_AM {
     }
   }
 
-  static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
-    switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
-    case ARM_AM::ia: return isLD ? "fd" : "ea";
-    case ARM_AM::ib: return isLD ? "ed" : "fa";
-    case ARM_AM::da: return isLD ? "fa" : "ed";
-    case ARM_AM::db: return isLD ? "ea" : "fd";
-    }
-  }
-
   /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
   ///
   static inline unsigned rotr32(unsigned Val, unsigned Amt) {
@@ -473,20 +467,13 @@ namespace ARM_AM {
   //    IB - Increment before
   //    DA - Decrement after
   //    DB - Decrement before
-  //
-  // If the 4th bit (writeback)is set, then the base register is updated after
-  // the memory transfer.
 
   static inline AMSubMode getAM4SubMode(unsigned Mode) {
     return (AMSubMode)(Mode & 0x7);
   }
 
-  static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
-    return (int)SubMode | ((int)WB << 3);
-  }
-
-  static inline bool getAM4WBFlag(unsigned Mode) {
-    return (Mode >> 3) & 1;
+  static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+    return (int)SubMode;
   }
 
   //===--------------------------------------------------------------------===//
@@ -501,9 +488,9 @@ namespace ARM_AM {
   // operation in bit 8 and the immediate in bits 0-7.
   //
   // This is also used for FP load/store multiple ops. The second operand
-  // encodes the writeback mode in bit 8 and the number of registers (or 2
-  // times the number of registers for DPR ops) in bits 0-7. In addition,
-  // bits 9-11 encode one of the following two sub-modes:
+  // encodes the number of registers (or 2 times the number of registers
+  // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
+  // following two sub-modes:
   //
   //    IA - Increment after
   //    DB - Decrement before
@@ -522,17 +509,13 @@ namespace ARM_AM {
 
   /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
   /// VSTM instructions.
-  static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
-                                   unsigned char Offset) {
+  static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
     assert((SubMode == ia || SubMode == db) &&
            "Illegal addressing mode 5 sub-mode!");
-    return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+    return ((int)SubMode << 8) | Offset;
   }
   static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
-    return (AMSubMode)((AM5Opc >> 9) & 0x7);
-  }
-  static inline bool getAM5WBFlag(unsigned AM5Opc) {
-    return ((AM5Opc >> 8) & 1);
+    return (AMSubMode)((AM5Opc >> 8) & 0x7);
   }
 
   //===--------------------------------------------------------------------===//
@@ -541,23 +524,11 @@ namespace ARM_AM {
   //
   // This is used for NEON load / store instructions.
   //
-  // addrmode6 := reg with optional writeback and alignment
+  // addrmode6 := reg with optional alignment
   //
-  // This is stored in four operands [regaddr, regupdate, opc, align].  The
-  // first is the address register.  The second register holds the value of
-  // a post-access increment for writeback or reg0 if no writeback or if the
-  // writeback increment is the size of the memory access.  The third
-  // operand encodes whether there is writeback to the address register. The
-  // fourth operand is the value of the alignment specifier to use or zero if
-  // no explicit alignment.
-
-  static inline unsigned getAM6Opc(bool WB = false) {
-    return (int)WB;
-  }
-
-  static inline bool getAM6WBFlag(unsigned Mode) {
-    return Mode & 1;
-  }
+  // This is stored in two operands [regaddr, align].  The first is the
+  // address register.  The second operand is the value of the alignment
+  // specifier to use or zero if no explicit alignment.
 
 } // end namespace ARM_AM
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8e537d8b6280..e6ea03aa4aab 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   if (SrcRC == ARM::tGPRRegisterClass)
     SrcRC = ARM::GPRRegisterClass;
 
-  if (DestRC != SrcRC) {
-    if (DestRC->getSize() != SrcRC->getSize())
-      return false;
+  // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
+  if (DestRC == ARM::DPR_8RegisterClass)
+    DestRC = ARM::DPR_VFP2RegisterClass;
+  if (SrcRC == ARM::DPR_8RegisterClass)
+    SrcRC = ARM::DPR_VFP2RegisterClass;
 
-    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
-    // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
-    if (DestRC->getSize() != 8 && DestRC->getSize() != 16)
-      return false;
-  }
+  // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
+  if (DestRC == ARM::QPR_VFP2RegisterClass ||
+      DestRC == ARM::QPR_8RegisterClass)
+    DestRC = ARM::QPRRegisterClass;
+  if (SrcRC == ARM::QPR_VFP2RegisterClass ||
+      SrcRC == ARM::QPR_8RegisterClass)
+    SrcRC = ARM::QPRRegisterClass;
+
+  // Disallow copies of unequal sizes.
+  if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
+    return false;
 
   if (DestRC == ARM::GPRRegisterClass) {
-    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
-                                        DestReg).addReg(SrcReg)));
-  } else if (DestRC == ARM::SPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
-                   .addReg(SrcReg));
-  } else if (DestRC == ARM::DPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
-                   .addReg(SrcReg));
-  } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
-             DestRC == ARM::DPR_8RegisterClass ||
-             SrcRC == ARM::DPR_VFP2RegisterClass ||
-             SrcRC == ARM::DPR_8RegisterClass) {
-    // Always use neon reg-reg move if source or dest is NEON-only regclass.
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon),
-                           DestReg).addReg(SrcReg));
-  } else if (DestRC == ARM::QPRRegisterClass ||
-             DestRC == ARM::QPR_VFP2RegisterClass ||
-             DestRC == ARM::QPR_8RegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ),
-                           DestReg).addReg(SrcReg));
+    if (SrcRC == ARM::SPRRegisterClass)
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
+                     .addReg(SrcReg));
+    else
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
+                                          DestReg).addReg(SrcReg)));
   } else {
-    return false;
+    unsigned Opc;
+
+    if (DestRC == ARM::SPRRegisterClass)
+      Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
+    else if (DestRC == ARM::DPRRegisterClass)
+      Opc = ARM::VMOVD;
+    else if (DestRC == ARM::DPR_VFP2RegisterClass ||
+             SrcRC == ARM::DPR_VFP2RegisterClass)
+      // Always use neon reg-reg move if source or dest is NEON-only regclass.
+      Opc = ARM::VMOVDneon;
+    else if (DestRC == ARM::QPRRegisterClass)
+      Opc = ARM::VMOVQ;
+    else
+      return false;
+
+    AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
+                   .addReg(SrcReg));
   }
 
   return true;
@@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     assert((RC == ARM::QPRRegisterClass ||
             RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
     // FIXME: Neon instructions should support predicates
-    if (Align >= 16
-        && (getRegisterInfo().canRealignStack(MF))) {
+    if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
-                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO)
                      .addReg(SrcReg, getKillRegState(isKill)));
     } else {
@@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (Align >= 16
         && (getRegisterInfo().canRealignStack(MF))) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
-                     .addFrameIndex(FI).addImm(0).addImm(0).addImm(128)
+                     .addFrameIndex(FI).addImm(128)
                      .addMemOperand(MMO));
     } else {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 767d5ec92651..292c4984dd63 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -93,34 +93,34 @@ namespace ARMII {
     StMiscFrm     = 9  << FormShift,
     LdStMulFrm    = 10 << FormShift,
 
-    LdStExFrm     = 28 << FormShift,
+    LdStExFrm     = 11 << FormShift,
 
     // Miscellaneous arithmetic instructions
-    ArithMiscFrm  = 11 << FormShift,
+    ArithMiscFrm  = 12 << FormShift,
 
     // Extend instructions
-    ExtFrm        = 12 << FormShift,
+    ExtFrm        = 13 << FormShift,
 
     // VFP formats
-    VFPUnaryFrm   = 13 << FormShift,
-    VFPBinaryFrm  = 14 << FormShift,
-    VFPConv1Frm   = 15 << FormShift,
-    VFPConv2Frm   = 16 << FormShift,
-    VFPConv3Frm   = 17 << FormShift,
-    VFPConv4Frm   = 18 << FormShift,
-    VFPConv5Frm   = 19 << FormShift,
-    VFPLdStFrm    = 20 << FormShift,
-    VFPLdStMulFrm = 21 << FormShift,
-    VFPMiscFrm    = 22 << FormShift,
+    VFPUnaryFrm   = 14 << FormShift,
+    VFPBinaryFrm  = 15 << FormShift,
+    VFPConv1Frm   = 16 << FormShift,
+    VFPConv2Frm   = 17 << FormShift,
+    VFPConv3Frm   = 18 << FormShift,
+    VFPConv4Frm   = 19 << FormShift,
+    VFPConv5Frm   = 20 << FormShift,
+    VFPLdStFrm    = 21 << FormShift,
+    VFPLdStMulFrm = 22 << FormShift,
+    VFPMiscFrm    = 23 << FormShift,
 
     // Thumb format
-    ThumbFrm      = 23 << FormShift,
+    ThumbFrm      = 24 << FormShift,
 
     // NEON format
-    NEONFrm       = 24 << FormShift,
-    NEONGetLnFrm  = 25 << FormShift,
-    NEONSetLnFrm  = 26 << FormShift,
-    NEONDupFrm    = 27 << FormShift,
+    NEONFrm       = 25 << FormShift,
+    NEONGetLnFrm  = 26 << FormShift,
+    NEONSetLnFrm  = 27 << FormShift,
+    NEONDupFrm    = 28 << FormShift,
 
     //===------------------------------------------------------------------===//
     // Misc flags.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 11e1c4862b41..b380c954d606 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   case D23: return 23;
   case D24: return 24;
   case D25: return 25;
-  case D26: return 27;
+  case D26: return 26;
   case D27: return 27;
   case D28: return 28;
   case D29: return 29;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 334c820b91cb..e7aa0c86d40e 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,12 +55,12 @@ namespace {
     const std::vector<MachineConstantPoolEntry> *MCPEs;
     const std::vector<MachineJumpTableEntry> *MJTEs;
     bool IsPIC;
-    
+
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineModuleInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    
+
     static char ID;
   public:
     ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
@@ -68,7 +68,7 @@ namespace {
         TD(tm.getTargetData()), TM(tm),
     MCE(mce), MCPEs(0), MJTEs(0),
     IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-    
+
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
@@ -163,7 +163,7 @@ namespace {
 
 char ARMCodeEmitter::ID = 0;
 
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM 
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
 /// code to the specified MCE object.
 FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
@@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   }
 }
 
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(
-                                                const MachineInstr &MI,
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
                                                 const TargetInstrDesc &TID,
                                                 const MachineOperand &MO,
                                                 unsigned OpIdx) {
@@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
 }
 
 unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
-                                             const TargetInstrDesc &TID) const {
+                                         const TargetInstrDesc &TID) const {
   for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
@@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
   return 0;
 }
 
-void ARMCodeEmitter::emitDataProcessingInstruction(
-                                                   const MachineInstr &MI,
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
                                                    unsigned ImplicitRd,
                                                    unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
@@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitLoadStoreInstruction(
-                                              const MachineInstr &MI,
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
                                               unsigned ImplicitRd,
                                               unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
@@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(
 }
 
 void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
-                                                        unsigned ImplicitRn) {
+                                                  unsigned ImplicitRn) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
   bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
@@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
   Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
 
   // Set bit W(21)
-  if (ARM_AM::getAM4WBFlag(MO.getImm()))
+  if (IsUpdating)
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // Set registers
@@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitVFPConversionInstruction(
-      const MachineInstr &MI) {
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
   unsigned Form = TID.TSFlags & ARMII::FormMask;
 
@@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
-                                                       const MachineInstr &MI) {
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
   bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
 
@@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
   Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
 
   // Set bit W(21)
-  if (ARM_AM::getAM5WBFlag(MO.getImm()))
+  if (IsUpdating)
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // First register is encoded in Dd.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 013e00ade7ff..71207c811e45 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -80,8 +80,7 @@ public:
                        SDValue &Mode);
   bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
-  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
-                       SDValue &Opc, SDValue &Align);
+  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
 
   bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
                         SDValue &Label);
@@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
 }
 
 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
-                                      SDValue &Addr, SDValue &Update,
-                                      SDValue &Opc, SDValue &Align) {
+                                      SDValue &Addr, SDValue &Align) {
   Addr = N;
-  // Default to no writeback.
-  Update = CurDAG->getRegister(0, MVT::i32);
-  Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
   // Default to no alignment.
   Align = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
@@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
   assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   if (is64BitVector) {
     unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
-                            Pred, PredReg, Chain };
+    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
     std::vector<EVT> ResTys(NumVecs, VT);
     ResTys.push_back(MVT::Other);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     // Quad registers are directly supported for VLD2,
     // loading 2 pairs of D regs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
-                            Pred, PredReg, Chain };
+    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
     std::vector<EVT> ResTys(4, VT);
     ResTys.push_back(MVT::Other);
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
     Chain = SDValue(VLd, 4);
 
     // Combine the even and odd subregs to produce the result.
@@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
     // Otherwise, quad registers are loaded with two separate instructions,
     // where one loads the even registers and the other loads the odd registers.
 
-    // Enable writeback to the address register.
-    MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
     std::vector<EVT> ResTys(NumVecs, RegVT);
     ResTys.push_back(MemAddr.getValueType());
     ResTys.push_back(MVT::Other);
 
     // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
-                             Pred, PredReg, Chain };
-    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
+    const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
     Chain = SDValue(VLdA, NumVecs+1);
 
     // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
-    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
-                             Align, Pred, PredReg, Chain };
-    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
+    const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
+                             Align, Reg0, Pred, Reg0, Chain };
+    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
     Chain = SDValue(VLdB, NumVecs+1);
 
     // Combine the even and odd subregs to produce the result.
@@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
   assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
-  SmallVector<SDValue, 8> Ops;
+  SmallVector<SDValue, 10> Ops;
   Ops.push_back(MemAddr);
-  Ops.push_back(MemUpdate);
-  Ops.push_back(MemOpc);
   Ops.push_back(Align);
 
   if (is64BitVector) {
@@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
       Ops.push_back(N->getOperand(Vec+3));
     Ops.push_back(Pred);
-    Ops.push_back(PredReg);
+    Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
                                                    N->getOperand(Vec+3)));
     }
     Ops.push_back(Pred);
-    Ops.push_back(PredReg);
+    Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
   // where one stores the even registers and the other stores the odd registers.
 
-  // Enable writeback to the address register.
-  MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+  Ops.push_back(Reg0); // post-access address offset
 
   // Store the even subregs.
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
                                                  N->getOperand(Vec+3)));
   Ops.push_back(Pred);
-  Ops.push_back(PredReg);
+  Ops.push_back(Reg0); // predicate register
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+7);
+                                        MVT::Other, Ops.data(), NumVecs+6);
   Chain = SDValue(VStA, 1);
 
   // Store the odd subregs.
   Ops[0] = SDValue(VStA, 0); // MemAddr
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
                                                 N->getOperand(Vec+3));
-  Ops[NumVecs+4] = Pred;
-  Ops[NumVecs+5] = PredReg;
-  Ops[NumVecs+6] = Chain;
+  Ops[NumVecs+5] = Chain;
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+7);
+                                        MVT::Other, Ops.data(), NumVecs+6);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   }
 
   SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
-  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
-  SmallVector<SDValue, 9> Ops;
+  SmallVector<SDValue, 10> Ops;
   Ops.push_back(MemAddr);
-  Ops.push_back(MemUpdate);
-  Ops.push_back(MemOpc);
   Ops.push_back(Align);
 
   unsigned Opc = 0;
@@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   }
   Ops.push_back(getI32Imm(Lane));
   Ops.push_back(Pred);
-  Ops.push_back(PredReg);
+  Ops.push_back(Reg0);
   Ops.push_back(Chain);
 
   if (!IsLoad)
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
 
   std::vector<EVT> ResTys(NumVecs, RegVT);
   ResTys.push_back(MVT::Other);
   SDNode *VLdLn =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8);
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
   // For a 64-bit vector load to D registers, nothing more needs to be done.
   if (is64BitVector)
     return VLdLn;
@@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vld3: {
       unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
                               ARM::VLD3d32, ARM::VLD3d64 };
-      unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
-      unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+      unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
+                               ARM::VLD3q16_UPD,
+                               ARM::VLD3q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
+                               ARM::VLD3q16odd_UPD,
+                               ARM::VLD3q32odd_UPD };
       return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4: {
       unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
                               ARM::VLD4d32, ARM::VLD4d64 };
-      unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
-      unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+      unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
+                               ARM::VLD4q16_UPD,
+                               ARM::VLD4q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
+                               ARM::VLD4q16odd_UPD,
+                               ARM::VLD4q32odd_UPD };
       return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld2lane: {
       unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
       return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld3lane: {
       unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
       return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4lane: {
       unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
-      unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
+      unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
+      unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
       return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
@@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vst3: {
       unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
                               ARM::VST3d32, ARM::VST3d64 };
-      unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
-      unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+      unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
+                               ARM::VST3q16_UPD,
+                               ARM::VST3q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
+                               ARM::VST3q16odd_UPD,
+                               ARM::VST3q32odd_UPD };
       return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4: {
       unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
                               ARM::VST4d32, ARM::VST4d64 };
-      unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
-      unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+      unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
+                               ARM::VST4q16_UPD,
+                               ARM::VST4q32_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
+                               ARM::VST4q16odd_UPD,
+                               ARM::VST4q32odd_UPD };
       return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2lane: {
       unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
       return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst3lane: {
       unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
       return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4lane: {
       unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
-      unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
+      unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
+      unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
       return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
     }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8f208432ab7e..0d0a004c2840 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
     }
     // Special handling for half-precision FP.
-    if (Subtarget->hasVFP3() && Subtarget->hasFP16()) {
-      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom);
-      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom);
+    if (!Subtarget->hasFP16()) {
+      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
     }
   }
 
@@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
   case ARMISD::SITOF:         return "ARMISD::SITOF";
   case ARMISD::UITOF:         return "ARMISD::UITOF";
-  case ARMISD::F16_TO_F32:    return "ARMISD::F16_TO_F32";
-  case ARMISD::F32_TO_F16:    return "ARMISD::F32_TO_F16";
 
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
@@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default:
     assert(0 && "Invalid opcode!");
-  case ISD::FP32_TO_FP16:
-    Opc = ARMISD::F32_TO_F16;
-    break;
   case ISD::FP_TO_SINT:
     Opc = ARMISD::FTOSI;
     break;
@@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   switch (Op.getOpcode()) {
   default:
     assert(0 && "Invalid opcode!");
-  case ISD::FP16_TO_FP32:
-    Opc = ARMISD::F16_TO_F32;
-    break;
   case ISD::SINT_TO_FP:
     Opc = ARMISD::SITOF;
     break;
@@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
-  case ISD::FP16_TO_FP32:
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
-  case ISD::FP32_TO_FP16:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d7b2ba3b5c72..f8f8adc70af8 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -59,8 +59,6 @@ namespace llvm {
       FTOUI,        // FP to uint within a FP register.
       SITOF,        // sint to FP within a FP register.
       UITOF,        // uint to FP within a FP register.
-      F16_TO_F32,   // Half FP to single FP within a FP register.
-      F32_TO_F16,   // Single FP to half FP within a FP register.
 
       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 258a96b9216e..4f6f05d40943 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -15,8 +15,8 @@
 // Format specifies the encoding used by the instruction.  This is part of the
 // ad-hoc solution used to emit machine instruction encodings by our machine
 // code emitter.
-class Format<bits<5> val> {
-  bits<5> Value = val;
+class Format<bits<6> val> {
+  bits<6> Value = val;
 }
 
 def Pseudo        : Format<0>;
@@ -33,32 +33,34 @@ def LdMiscFrm     : Format<8>;
 def StMiscFrm     : Format<9>;
 def LdStMulFrm    : Format<10>;
 
-def LdStExFrm     : Format<28>;
+def LdStExFrm     : Format<11>;
 
-def ArithMiscFrm  : Format<11>;
-def ExtFrm        : Format<12>;
+def ArithMiscFrm  : Format<12>;
+def ExtFrm        : Format<13>;
 
-def VFPUnaryFrm   : Format<13>;
-def VFPBinaryFrm  : Format<14>;
-def VFPConv1Frm   : Format<15>;
-def VFPConv2Frm   : Format<16>;
-def VFPConv3Frm   : Format<17>;
-def VFPConv4Frm   : Format<18>;
-def VFPConv5Frm   : Format<19>;
-def VFPLdStFrm    : Format<20>;
-def VFPLdStMulFrm : Format<21>;
-def VFPMiscFrm    : Format<22>;
+def VFPUnaryFrm   : Format<14>;
+def VFPBinaryFrm  : Format<15>;
+def VFPConv1Frm   : Format<16>;
+def VFPConv2Frm   : Format<17>;
+def VFPConv3Frm   : Format<18>;
+def VFPConv4Frm   : Format<19>;
+def VFPConv5Frm   : Format<20>;
+def VFPLdStFrm    : Format<21>;
+def VFPLdStMulFrm : Format<22>;
+def VFPMiscFrm    : Format<23>;
 
-def ThumbFrm      : Format<23>;
+def ThumbFrm      : Format<24>;
 
-def NEONFrm       : Format<24>;
-def NEONGetLnFrm  : Format<25>;
-def NEONSetLnFrm  : Format<26>;
-def NEONDupFrm    : Format<27>;
+def NEONFrm       : Format<25>;
+def NEONGetLnFrm  : Format<26>;
+def NEONSetLnFrm  : Format<27>;
+def NEONDupFrm    : Format<28>;
 
 def MiscFrm       : Format<29>;
 def ThumbMiscFrm  : Format<30>;
 
+def NLdStFrm      : Format<31>;
+
 // Misc flags.
 
 // the instruction has a Rn register operand.
@@ -71,7 +73,7 @@ class UnaryDP    { bit isUnaryDataProc = 1; }
 class Xform16Bit { bit canXformTo16Bit = 1; }
 
 //===----------------------------------------------------------------------===//
-// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+// ARM Instruction flags.  These need to match ARMBaseInstrInfo.h.
 //
 
 // Addressing mode.
@@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
   bits<2> IndexModeBits = IM.Value;
   
   Format F = f;
-  bits<5> Form = F.Value;
+  bits<6> Form = F.Value;
 
   Domain D = d;
   bits<2> Dom = D.Value;
@@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
         list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
@@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
          list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
@@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
-  let OutOperandList = !con(oops, (ops s_cc_out:$s));
-  let InOperandList = !con(iops, (ops pred:$p));
+  let OutOperandList = !con(oops, (outs s_cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
@@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb1Only];
@@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
               string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
                  string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
@@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
            string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasVFP2];
@@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 // ARM NEON Instruction templates.
 //
 
-class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
-            string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+            InstrItinClass itin, string opc, string dt, string asm, string cstr,
+            list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(
                      !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
                      !strconcat("\t", asm));
@@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
             string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
@@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
 class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
             dag oops, dag iops, InstrItinClass itin,
             string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr,
-          pattern> {
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+          cstr, pattern> {
   let Inst{31-24} = 0b11110100;
   let Inst{23} = op23;
   let Inst{21-20} = op21_20;
@@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
 
 class NDataI<dag oops, dag iops, InstrItinClass itin,
              string opc, string dt, string asm, string cstr, list<dag> pattern>
-  : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm,
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm,
          cstr, pattern> {
   let Inst{31-25} = 0b1111001;
 }
@@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
   let Inst{4} = 1;
 
   let OutOperandList = oops;
-  let InOperandList = !con(iops, (ops pred:$p));
+  let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(
                      !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
                      !strconcat("\t", asm));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3fc37dae4e4e..26a280697b90 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,11 +62,14 @@ def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
                               [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 
 def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
                               [SDNPHasChain, SDNPOptInFlag]>;
@@ -282,7 +285,7 @@ def pclabel : Operand<i32> {
 
 // shifter_operand operands: so_reg and so_imm.
 def so_reg : Operand<i32>,    // reg reg imm
-            ComplexPattern<i32, 3, "SelectShifterOperandReg",
+             ComplexPattern<i32, 3, "SelectShifterOperandReg",
                             [shl,srl,sra,rotr]> {
   let PrintMethod = "printSORegOperand";
   let MIOperandInfo = (ops GPR, GPR, i32imm);
@@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>,
 // addrmode6 := reg with optional writeback
 //
 def addrmode6 : Operand<i32>,
-                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode6", []> {
   let PrintMethod = "printAddrMode6Operand";
-  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+}
+
+def am6offset : Operand<i32> {
+  let PrintMethod = "printAddrMode6OffsetOperand";
+  let MIOperandInfo = (ops GPR);
 }
 
 // addrmodepc := pc + reg
@@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
   def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                         reglist:$dsts, variable_ops),
                        IndexModeUpd, LdStMulFrm, IIC_Br,
-                       "ldm${addr:submode}${p}\t$addr, $dsts",
+                       "ldm${addr:submode}${p}\t$addr!, $dsts",
                        "$addr.addr = $wb", []>;
 
 // On non-Darwin platforms R9 is callee-saved.
@@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
 def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                       reglist:$dsts, variable_ops),
                      IndexModeUpd, LdStMulFrm, IIC_iLoadm,
-                     "ldm${addr:submode}${p}\t$addr, $dsts",
+                     "ldm${addr:submode}${p}\t$addr!, $dsts",
                      "$addr.addr = $wb", []>;
 } // mayLoad, hasExtraDefRegAllocReq
 
@@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
 def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                       reglist:$srcs, variable_ops),
                      IndexModeUpd, LdStMulFrm, IIC_iStorem,
-                     "stm${addr:submode}${p}\t$addr, $srcs",
+                     "stm${addr:submode}${p}\t$addr!, $srcs",
                      "$addr.addr = $wb", []>;
 } // mayStore, hasExtraSrcRegAllocReq
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8fee6fa9527a..c977cc3f5d64 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
 }
 
 //   VLD1     : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          OpcodeStr, Dt, "\\{$dst\\}, $addr", "",
-          [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+          "vld1", Dt, "\\{$dst\\}, $addr", "",
+          [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
+class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
-          OpcodeStr, Dt, "${dst:dregpair}, $addr", "",
-          [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
+          "vld1", Dt, "${dst:dregpair}, $addr", "",
+          [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
 
-def  VLD1d8   : VLD1D<0b0000, "vld1", "8",  v8i8,  int_arm_neon_vld1>;
-def  VLD1d16  : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>;
-def  VLD1d32  : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>;
-def  VLD1df   : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>;
-def  VLD1d64  : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>;
+def  VLD1d8   : VLD1D<0b0000, "8",  v8i8>;
+def  VLD1d16  : VLD1D<0b0100, "16", v4i16>;
+def  VLD1d32  : VLD1D<0b1000, "32", v2i32>;
+def  VLD1df   : VLD1D<0b1000, "32", v2f32>;
+def  VLD1d64  : VLD1D<0b1100, "64", v1i64>;
 
-def  VLD1q8   : VLD1Q<0b0000, "vld1", "8",  v16i8, int_arm_neon_vld1>;
-def  VLD1q16  : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>;
-def  VLD1q32  : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
-def  VLD1qf   : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
-def  VLD1q64  : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+def  VLD1q8   : VLD1Q<0b0000, "8",  v16i8>;
+def  VLD1q16  : VLD1Q<0b0100, "16", v8i16>;
+def  VLD1q32  : VLD1Q<0b1000, "32", v4i32>;
+def  VLD1qf   : VLD1Q<0b1000, "32", v4f32>;
+def  VLD1q64  : VLD1Q<0b1100, "64", v2i64>;
 
-// These (dreg triple/quadruple) are for disassembly only.
-class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
-          "\\{$dst1, $dst2, $dst3\\}, $addr", "",
-          [/* For disassembly only; pattern left blank */]>;
-class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
-          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
-          [/* For disassembly only; pattern left blank */]>;
+let mayLoad = 1 in {
 
-def  VLD1d8T  : VLD1D3<0b0000, "vld1", "8">;
-def  VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
-def  VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
-//def  VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+          "vld1", Dt, "\\{$dst\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VLD1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+          "vld1", Dt, "${dst:dregpair}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-def  VLD1d8Q  : VLD1D4<0b0000, "vld1", "8">;
-def  VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
-def  VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
-//def  VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+def VLD1d8_UPD  : VLD1DWB<0b0000, "8">;
+def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
+def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
+def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
 
+def VLD1q8_UPD  : VLD1QWB<0b0000, "8">;
+def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
+def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
+def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+} // mayLoad = 1
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 
+// These (dreg triple/quadruple) are for disassembly only.
+class VLD1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+class VLD1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+
+def  VLD1d8T  : VLD1D3<0b0000, "8">;
+def  VLD1d16T : VLD1D3<0b0100, "16">;
+def  VLD1d32T : VLD1D3<0b1000, "32">;
+//   VLD1d64T : implemented as VLD3d64
+
+def  VLD1d8Q  : VLD1D4<0b0000, "8">;
+def  VLD1d16Q : VLD1D4<0b0100, "16">;
+def  VLD1d32Q : VLD1D4<0b1000, "32">;
+//   VLD1d64Q : implemented as VLD4d64
+
+// ...with address register writeback:
+class VLD1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+class VLD1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+
+def VLD1d8T_UPD  : VLD1D3WB<0b0000, "8">;
+def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
+def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
+//  VLD1d64T_UPD : implemented as VLD3d64_UPD
+
+def VLD1d8Q_UPD  : VLD1D4WB<0b0000, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
+//  VLD1d64Q_UPD : implemented as VLD4d64_UPD
+
 //   VLD2     : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2),
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
           (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
-class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0011,op7_4,
+          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+class VLD2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
           (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
           (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
-          "", []>;
+          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
 
-def  VLD2d8   : VLD2D<0b0000, "vld2", "8">;
-def  VLD2d16  : VLD2D<0b0100, "vld2", "16">;
-def  VLD2d32  : VLD2D<0b1000, "vld2", "32">;
+def  VLD2d8   : VLD2D<0b1000, 0b0000, "8">;
+def  VLD2d16  : VLD2D<0b1000, 0b0100, "16">;
+def  VLD2d32  : VLD2D<0b1000, 0b1000, "32">;
 def  VLD2d64  : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
 
-def  VLD2q8   : VLD2Q<0b0000, "vld2", "8">;
-def  VLD2q16  : VLD2Q<0b0100, "vld2", "16">;
-def  VLD2q32  : VLD2Q<0b1000, "vld2", "32">;
+def  VLD2q8   : VLD2Q<0b0000, "8">;
+def  VLD2q16  : VLD2Q<0b0100, "16">;
+def  VLD2q32  : VLD2Q<0b1000, "32">;
 
-// These (double-spaced dreg pair) are for disassembly only.
-class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr), IIC_VLD2,
-          OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
-
-def  VLD2d8D  : VLD2Ddbl<0b0000, "vld2", "8">;
-def  VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
-def  VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
-
-//   VLD3     : Vector Load (multiple 3-element structures)
-class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr), IIC_VLD3,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
-class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$addr), IIC_VLD3,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr",
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VLD2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
           "$addr.addr = $wb", []>;
 
-def  VLD3d8   : VLD3D<0b0000, "vld3", "8">;
-def  VLD3d16  : VLD3D<0b0100, "vld3", "16">;
-def  VLD3d32  : VLD3D<0b1000, "vld3", "32">;
+def VLD2d8_UPD  : VLD2DWB<0b1000, 0b0000, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+def VLD2q8_UPD  : VLD2QWB<0b0000, "8">;
+def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
+def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8      : VLD2D<0b1001, 0b0000, "8">;
+def VLD2b16     : VLD2D<0b1001, 0b0100, "16">;
+def VLD2b32     : VLD2D<0b1001, 0b1000, "32">;
+def VLD2b8_UPD  : VLD2DWB<0b1001, 0b0000, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
+
+//   VLD3     : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr), IIC_VLD3,
+          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+
+def  VLD3d8   : VLD3D<0b0100, 0b0000, "8">;
+def  VLD3d16  : VLD3D<0b0100, 0b0100, "16">;
+def  VLD3d32  : VLD3D<0b0100, 0b1000, "32">;
 def  VLD3d64  : NLdSt<0,0b10,0b0110,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
 
-// vld3 to double-spaced even registers.
-def  VLD3q8a  : VLD3WB<0b0000, "vld3", "8">;
-def  VLD3q16a : VLD3WB<0b0100, "vld3", "16">;
-def  VLD3q32a : VLD3WB<0b1000, "vld3", "32">;
-
-// vld3 to double-spaced odd registers.
-def  VLD3q8b  : VLD3WB<0b0000, "vld3", "8">;
-def  VLD3q16b : VLD3WB<0b0100, "vld3", "16">;
-def  VLD3q32b : VLD3WB<0b1000, "vld3", "32">;
-
-//   VLD4     : Vector Load (multiple 4-element structures)
-class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0000,op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD4,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
-          "", []>;
-class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b10,0b0001,op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr), IIC_VLD4,
-          OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
+          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
           "$addr.addr = $wb", []>;
 
-def  VLD4d8   : VLD4D<0b0000, "vld4", "8">;
-def  VLD4d16  : VLD4D<0b0100, "vld4", "16">;
-def  VLD4d32  : VLD4D<0b1000, "vld4", "32">;
+def VLD3d8_UPD  : VLD3DWB<0b0100, 0b0000, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD3q8      : VLD3D<0b0101, 0b0000, "8">;
+def VLD3q16     : VLD3D<0b0101, 0b0100, "16">;
+def VLD3q32     : VLD3D<0b0101, 0b1000, "32">;
+def VLD3q8_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8odd_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
+def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
+def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+
+//   VLD4     : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr), IIC_VLD4,
+          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+
+def  VLD4d8   : VLD4D<0b0000, 0b0000, "8">;
+def  VLD4d16  : VLD4D<0b0000, 0b0100, "16">;
+def  VLD4d32  : VLD4D<0b0000, 0b1000, "32">;
 def  VLD4d64  : NLdSt<0,0b10,0b0010,0b1100,
                       (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
                       (ins addrmode6:$addr), IIC_VLD1,
                       "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
                       "", []>;
 
-// vld4 to double-spaced even registers.
-def  VLD4q8a  : VLD4WB<0b0000, "vld4", "8">;
-def  VLD4q16a : VLD4WB<0b0100, "vld4", "16">;
-def  VLD4q32a : VLD4WB<0b1000, "vld4", "32">;
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
+          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vld4 to double-spaced odd registers.
-def  VLD4q8b  : VLD4WB<0b0000, "vld4", "8">;
-def  VLD4q16b : VLD4WB<0b0100, "vld4", "16">;
-def  VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
+def VLD4d8_UPD  : VLD4DWB<0b0000, 0b0000, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100,
+                        (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4,
+                         GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+                        "vld1", "64",
+                        "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VLD4q8      : VLD4D<0b0001, 0b0000, "8">;
+def VLD4q16     : VLD4D<0b0001, 0b0100, "16">;
+def VLD4q32     : VLD4D<0b0001, 0b1000, "32">;
+def VLD4q8_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8odd_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
+def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
+def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
 
 //   VLD1LN   : Vector Load (single element to one lane)
 //   FIXME: Not yet implemented.
 
 //   VLD2LN   : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-            IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2", []>;
+class VLD2LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+          "$src1 = $dst1, $src2 = $dst2", []>;
 
-// vld2 to single-spaced registers.
-def VLD2LNd8  : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
+def VLD2LNd8  : VLD2LN<0b0001, "8">;
+def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; }
 
-// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
 
-// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
+          "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
+          "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
+
+def VLD2LNd8_UPD  : VLD2LNWB<0b0001, "8">;
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; }
 
 //   VLD3LN   : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-            nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
-            "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+class VLD3LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
+          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
 
-// vld3 to single-spaced registers.
-def VLD3LNd8  : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
+def VLD3LNd8  : VLD3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
 
-// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
-// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VLD3, "vld3", Dt,
+          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
+          []>;
+
+def VLD3LNd8_UPD  : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
 //   VLD4LN   : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b10,op11_8,{?,?,?,?},
-            (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-            nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
+class VLD4LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
           "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
-            "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
 
-// vld4 to single-spaced registers.
-def VLD4LNd8  : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
+def VLD4LNd8  : VLD4LN<0b0011, "8">;
+def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; }
 
-// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
 
-// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VLD4, "vld4", Dt,
+"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
+"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
+          []>;
+
+def VLD4LNd8_UPD  : VLD4LNWB<0b0011, "8">;
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
@@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
 } // mayLoad = 1, hasExtraDefRegAllocReq = 1
 
 //   VST1     : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
-          OpcodeStr, Dt, "\\{$src\\}, $addr", "",
-          [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt,
-            ValueType Ty, Intrinsic IntOp>
+          "vst1", Dt, "\\{$src\\}, $addr", "",
+          [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
+class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
   : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
-          OpcodeStr, Dt, "${src:dregpair}, $addr", "",
-          [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
+          "vst1", Dt, "${src:dregpair}, $addr", "",
+          [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
 
 let hasExtraSrcRegAllocReq = 1 in {
-def  VST1d8   : VST1D<0b0000, "vst1", "8",  v8i8,  int_arm_neon_vst1>;
-def  VST1d16  : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>;
-def  VST1d32  : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>;
-def  VST1df   : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>;
-def  VST1d64  : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>;
+def  VST1d8   : VST1D<0b0000, "8",  v8i8>;
+def  VST1d16  : VST1D<0b0100, "16", v4i16>;
+def  VST1d32  : VST1D<0b1000, "32", v2i32>;
+def  VST1df   : VST1D<0b1000, "32", v2f32>;
+def  VST1d64  : VST1D<0b1100, "64", v1i64>;
 
-def  VST1q8   : VST1Q<0b0000, "vst1", "8",  v16i8, int_arm_neon_vst1>;
-def  VST1q16  : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>;
-def  VST1q32  : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>;
-def  VST1qf   : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
-def  VST1q64  : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
+def  VST1q8   : VST1Q<0b0000, "8",  v16i8>;
+def  VST1q16  : VST1Q<0b0100, "16", v8i16>;
+def  VST1q32  : VST1Q<0b1000, "32", v4i32>;
+def  VST1qf   : VST1Q<0b1000, "32", v4f32>;
+def  VST1q64  : VST1Q<0b1100, "64", v2i64>;
 } // hasExtraSrcRegAllocReq
 
-// These (dreg triple/quadruple) are for disassembly only.
-class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt,
-          "\\{$src1, $src2, $src3\\}, $addr", "",
-          [/* For disassembly only; pattern left blank */]>;
-class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt,
-          "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
-          [/* For disassembly only; pattern left blank */]>;
-
-def  VST1d8T  : VST1D3<0b0000, "vst1", "8">;
-def  VST1d16T : VST1D3<0b0100, "vst1", "16">;
-def  VST1d32T : VST1D3<0b1000, "vst1", "32">;
-//def  VST1d64T : VST1D3<0b1100, "vst1", "64">;
-
-def  VST1d8Q  : VST1D4<0b0000, "vst1", "8">;
-def  VST1d16Q : VST1D4<0b0100, "vst1", "16">;
-def  VST1d32Q : VST1D4<0b1000, "vst1", "32">;
-//def  VST1d64Q : VST1D4<0b1100, "vst1", "64">;
-
-
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
 
-//   VST2     : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b1000,op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
-class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0011,op7_4, (outs),
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
+          "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
+class VST1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+          "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+
+def VST1d8_UPD  : VST1DWB<0b0000, "8">;
+def VST1d16_UPD : VST1DWB<0b0100, "16">;
+def VST1d32_UPD : VST1DWB<0b1000, "32">;
+def VST1d64_UPD : VST1DWB<0b1100, "64">;
+
+def VST1q8_UPD  : VST1QWB<0b0000, "8">;
+def VST1q16_UPD : VST1QWB<0b0100, "16">;
+def VST1q32_UPD : VST1QWB<0b1000, "32">;
+def VST1q64_UPD : VST1QWB<0b1100, "64">;
+
+// These (dreg triple/quadruple) are for disassembly only.
+class VST1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+class VST1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+
+def  VST1d8T  : VST1D3<0b0000, "8">;
+def  VST1d16T : VST1D3<0b0100, "16">;
+def  VST1d32T : VST1D3<0b1000, "32">;
+//   VST1d64T : implemented as VST3d64
+
+def  VST1d8Q  : VST1D4<0b0000, "8">;
+def  VST1d16Q : VST1D4<0b0100, "16">;
+def  VST1d32Q : VST1D4<0b1000, "32">;
+//   VST1d64Q : implemented as VST4d64
+
+// ...with address register writeback:
+class VST1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+          "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+class VST1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+          "$addr.addr = $wb",
+          [/* For disassembly only; pattern left blank */]>;
+
+def VST1d8T_UPD  : VST1D3WB<0b0000, "8">;
+def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
+def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
+//  VST1d64T_UPD : implemented as VST3d64_UPD
+
+def VST1d8Q_UPD  : VST1D4WB<0b0000, "8">;
+def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
+def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
+//  VST1d64Q_UPD : implemented as VST4d64_UPD
+
+//   VST2     : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+class VST2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "", []>;
 
-def  VST2d8   : VST2D<0b0000, "vst2", "8">;
-def  VST2d16  : VST2D<0b0100, "vst2", "16">;
-def  VST2d32  : VST2D<0b1000, "vst2", "32">;
+def  VST2d8   : VST2D<0b1000, 0b0000, "8">;
+def  VST2d16  : VST2D<0b1000, 0b0100, "16">;
+def  VST2d32  : VST2D<0b1000, 0b1000, "32">;
 def  VST2d64  : NLdSt<0,0b00,0b1010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
                       "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
 
-def  VST2q8   : VST2Q<0b0000, "vst2", "8">;
-def  VST2q16  : VST2Q<0b0100, "vst2", "16">;
-def  VST2q32  : VST2Q<0b1000, "vst2", "32">;
+def  VST2q8   : VST2Q<0b0000, "8">;
+def  VST2q16  : VST2Q<0b0100, "16">;
+def  VST2q32  : VST2Q<0b1000, "32">;
 
-// These (double-spaced dreg pair) are for disassembly only.
-class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
-
-def  VST2d8D  : VST2Ddbl<0b0000, "vst2", "8">;
-def  VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
-def  VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
-
-//   VST3     : Vector Store (multiple 3-element structures)
-class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0100,op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
-class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr",
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+class VST2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
           "$addr.addr = $wb", []>;
 
-def  VST3d8   : VST3D<0b0000, "vst3", "8">;
-def  VST3d16  : VST3D<0b0100, "vst3", "16">;
-def  VST3d32  : VST3D<0b1000, "vst3", "32">;
+def VST2d8_UPD  : VST2DWB<0b1000, 0b0000, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
+def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+                        (ins addrmode6:$addr, am6offset:$offset,
+                         DPR:$src1, DPR:$src2), IIC_VST,
+                        "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
+                        "$addr.addr = $wb", []>;
+
+def VST2q8_UPD  : VST2QWB<0b0000, "8">;
+def VST2q16_UPD : VST2QWB<0b0100, "16">;
+def VST2q32_UPD : VST2QWB<0b1000, "32">;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8      : VST2D<0b1001, 0b0000, "8">;
+def VST2b16     : VST2D<0b1001, 0b0100, "16">;
+def VST2b32     : VST2D<0b1001, 0b1000, "32">;
+def VST2b8_UPD  : VST2DWB<0b1001, 0b0000, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
+
+//   VST3     : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+
+def  VST3d8   : VST3D<0b0100, 0b0000, "8">;
+def  VST3d16  : VST3D<0b0100, 0b0100, "16">;
+def  VST3d32  : VST3D<0b0100, 0b1000, "32">;
 def  VST3d64  : NLdSt<0,0b00,0b0110,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
                       IIC_VST,
                       "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>;
 
-// vst3 to double-spaced even registers.
-def  VST3q8a  : VST3WB<0b0000, "vst3", "8">;
-def  VST3q16a : VST3WB<0b0100, "vst3", "16">;
-def  VST3q32a : VST3WB<0b1000, "vst3", "32">;
-
-// vst3 to double-spaced odd registers.
-def  VST3q8b  : VST3WB<0b0000, "vst3", "8">;
-def  VST3q16b : VST3WB<0b0100, "vst3", "16">;
-def  VST3q32b : VST3WB<0b1000, "vst3", "32">;
-
-//   VST4     : Vector Store (multiple 4-element structures)
-class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0000,op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
-          "", []>;
-class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
-  : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
           "$addr.addr = $wb", []>;
 
-def  VST4d8   : VST4D<0b0000, "vst4", "8">;
-def  VST4d16  : VST4D<0b0100, "vst4", "16">;
-def  VST4d32  : VST4D<0b1000, "vst4", "32">;
+def VST3d8_UPD  : VST3DWB<0b0100, 0b0000, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb),
+                      (ins addrmode6:$addr, am6offset:$offset,
+                       DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+                      "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset",
+                      "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST3q8      : VST3D<0b0101, 0b0000, "8">;
+def VST3q16     : VST3D<0b0101, 0b0100, "16">;
+def VST3q32     : VST3D<0b0101, 0b1000, "32">;
+def VST3q8_UPD  : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8odd_UPD  : VST3DWB<0b0101, 0b0000, "8">;
+def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
+def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+
+//   VST4     : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          "", []>;
+
+def  VST4d8   : VST4D<0b0000, 0b0000, "8">;
+def  VST4d16  : VST4D<0b0000, 0b0100, "16">;
+def  VST4d32  : VST4D<0b0000, 0b1000, "32">;
 def  VST4d64  : NLdSt<0,0b00,0b0010,0b1100, (outs),
                       (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
                        DPR:$src4), IIC_VST,
                       "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr",
                       "", []>;
 
-// vst4 to double-spaced even registers.
-def  VST4q8a  : VST4WB<0b0000, "vst4", "8">;
-def  VST4q16a : VST4WB<0b0100, "vst4", "16">;
-def  VST4q32a : VST4WB<0b1000, "vst4", "32">;
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+           "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
 
-// vst4 to double-spaced odd registers.
-def  VST4q8b  : VST4WB<0b0000, "vst4", "8">;
-def  VST4q16b : VST4WB<0b0100, "vst4", "16">;
-def  VST4q32b : VST4WB<0b1000, "vst4", "32">;
+def VST4d8_UPD  : VST4DWB<0b0000, 0b0000, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb),
+                      (ins addrmode6:$addr, am6offset:$offset,
+                       DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+                      "vst1", "64",
+                      "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+                      "$addr.addr = $wb", []>;
+
+// ...with double-spaced registers (non-updating versions for disassembly only):
+def VST4q8      : VST4D<0b0001, 0b0000, "8">;
+def VST4q16     : VST4D<0b0001, 0b0100, "16">;
+def VST4q32     : VST4D<0b0001, 0b1000, "32">;
+def VST4q8_UPD  : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8odd_UPD  : VST4DWB<0b0001, 0b0000, "8">;
+def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
+def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
 
 //   VST1LN   : Vector Store (single element from one lane)
 //   FIXME: Not yet implemented.
 
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST2LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+          IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
           "", []>;
 
-// vst2 to single-spaced registers.
-def VST2LNd8  : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
+def VST2LNd8  : VST2LN<0b0001, "8">;
+def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; }
 
-// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
 
-// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+
+def VST2LNd8_UPD  : VST2LNWB<0b0001, "8">;
+def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; }
+def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; }
+def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; }
 
 //   VST3LN   : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST3LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+           nohash_imm:$lane), IIC_VST, "vst3", Dt,
           "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
 
-// vst3 to single-spaced registers.
-def VST3LNd8  : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8  : VST3LN<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
 
-// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
-// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
+// ...alternate versions to be allocated odd register numbers:
+def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VST, "vst3", Dt,
+          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+
+def VST3LNd8_UPD  : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; }
+def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
 
 //   VST4LN   : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
-  : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
+class VST4LN<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+           nohash_imm:$lane), IIC_VST, "vst4", Dt,
           "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
           "", []>;
 
-// vst4 to single-spaced registers.
-def VST4LNd8  : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
+def VST4LNd8  : VST4LN<0b0011, "8">;
+def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; }
 
-// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
 
-// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
+// ...alternate versions to be allocated odd register numbers:
+def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, string Dt>
+  : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VST, "vst4", Dt,
+  "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []>;
+
+def VST4LNd8_UPD  : VST4LNWB<0b0011, "8">;
+def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; }
+def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; }
+def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; }
 
 } // mayStore = 1, hasExtraSrcRegAllocReq = 1
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 37c9fc5f734e..e3ca5369feec 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -16,7 +16,8 @@
 //
 
 def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
-                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                       SDNPVariadic]>;
 
 def imm_neg_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
@@ -549,7 +550,7 @@ def tLDM : T1I<(outs),
 def tLDM_UPD : T1It<(outs tGPR:$wb),
                     (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
                     IIC_iLoadm,
-                    "ldm${addr:submode}${p}\t$addr, $dsts",
+                    "ldm${addr:submode}${p}\t$addr!, $dsts",
                     "$addr.addr = $wb", []>,
                T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
 } // mayLoad, hasExtraDefRegAllocReq
@@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def tSTM_UPD : T1It<(outs tGPR:$wb),
                     (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
                     IIC_iStorem,
-                    "stm${addr:submode}${p}\t$addr, $srcs",
+                    "stm${addr:submode}${p}\t$addr!, $srcs",
                     "$addr.addr = $wb", []>,
            T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index ab9e926099c2..262aae48913a 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
 
 def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                        reglist:$dsts, variable_ops), IIC_iLoadm,
-                      "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                      "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
                       "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
@@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
 def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IIC_iStorem,
-                      "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs",
+                      "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
                       "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 4d1d48a11341..aca823022126 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -25,8 +25,6 @@ def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
 def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
 def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
 def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
-def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>;
-def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>;
 def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
@@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
 def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$dsts, variable_ops),
                       IndexModeUpd, IIC_fpLoadm,
-                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
                       "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
@@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
 def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$dsts, variable_ops),
                       IndexModeUpd, IIC_fpLoadm, 
-                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
                       "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
@@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
 def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
                       "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
@@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
 def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
                                        reglist:$srcs, variable_ops),
                       IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
                       "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
@@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
-                 [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>;
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
-                 [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>;
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8fbcf45dfd63..bdbec30d8596 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
         .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
     : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
         .addReg(Base, getKillRegState(BaseKill))
-        .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+        .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
         .addImm(Pred).addReg(PredReg);
   for (unsigned i = 0; i != NumRegs; ++i)
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
@@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
       if (MI->getOperand(i).getReg() == Base)
         return false;
     }
-    assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()));
     Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
   } else {
     // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
-    assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()));
     Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
     Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
   }
@@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     .addReg(Base, getKillRegState(BaseKill));
   if (isAM4) {
     // [t2]LDM_UPD, [t2]STM_UPD
-    MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true))
+    MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
       .addImm(Pred).addReg(PredReg);
   } else {
     // VLDM[SD}_UPD, VSTM[SD]_UPD
-    MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset))
+    MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
       .addImm(Pred).addReg(PredReg);
   }
   // Transfer the rest of operands.
@@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned Offset = 0;
   if (isAM5)
     Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
-                               true, (isDPR ? 2 : 1));
+                               (isDPR ? 2 : 1));
   else if (isAM2)
     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
   else
@@ -1157,19 +1155,24 @@ namespace {
   };
 }
 
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
-/// (bx lr) into the preceeding stack restore so it directly restore the value
-/// of LR into pc.
-///   ldmfd sp!, {r7, lr}
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+///   ldmfd sp!, {..., lr}
 ///   bx lr
+/// or
+///   ldmfd sp!, {..., lr}
+///   mov pc, lr
 /// =>
-///   ldmfd sp!, {r7, pc}
+///   ldmfd sp!, {..., pc}
 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
   if (MBBI != MBB.begin() &&
-      (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
+      (MBBI->getOpcode() == ARM::BX_RET ||
+       MBBI->getOpcode() == ARM::tBX_RET ||
+       MBBI->getOpcode() == ARM::MOVPCLR)) {
     MachineInstr *PrevMI = prior(MBBI);
     if (PrevMI->getOpcode() == ARM::LDM_UPD ||
         PrevMI->getOpcode() == ARM::t2LDM_UPD) {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7233f5c8babc..95f57b7b34fa 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,7 +21,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 88e67e3808f4..c32f16c77a24 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -23,6 +23,7 @@
 #include "ARMISelLowering.h"
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
+#include "llvm/ADT/OwningPtr.h"
 
 namespace llvm {
 
@@ -83,7 +84,8 @@ public:
 ///   Thumb-1 and Thumb-2.
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
-  ARMBaseInstrInfo    *InstrInfo;   // either Thumb1InstrInfo or Thumb2InstrInfo
+  // Either Thumb1InstrInfo or Thumb2InstrInfo.
+  OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
@@ -100,7 +102,9 @@ public:
   }
 
   /// returns either Thumb1InstrInfo or Thumb2InstrInfo
-  virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
+  virtual const ARMBaseInstrInfo *getInstrInfo() const {
+    return InstrInfo.get();
+  }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 };
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4db14a334ab9..4a7a1e43bd36 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -101,6 +101,7 @@ namespace {
     void printAddrMode5Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
     void printAddrMode6Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum);
     void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
@@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
   O << "[" << getRegisterName(MO1.getReg());
 
   if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
       O << ", #"
-        << (char)ARM_AM::getAM2Op(MO3.getImm())
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
         << ARM_AM::getAM2Offset(MO3.getImm());
     O << "]";
     return;
   }
 
   O << ", "
-    << (char)ARM_AM::getAM2Op(MO3.getImm())
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
     << getRegisterName(MO2.getReg());
 
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
@@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
     assert(ImmOffs && "Malformed indexed load / store!");
     O << "#"
-      << (char)ARM_AM::getAM2Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
       << ImmOffs;
     return;
   }
 
-  O << (char)ARM_AM::getAM2Op(MO2.getImm())
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
     << getRegisterName(MO1.getReg());
 
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
@@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
 
   if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
     O << ", #"
-      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
       << ImmOffs;
   O << "]";
 }
@@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
   assert(ImmOffs && "Malformed indexed load / store!");
   O << "#"
-    << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
     << ImmOffs;
 }
 
 void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
                                           const char *Modifier) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
   if (Modifier && strcmp(Modifier, "submode") == 0) {
-    if (MO1.getReg() == ARM::SP) {
-      // FIXME
-      bool isLDM = (MI->getOpcode() == ARM::LDM ||
-                    MI->getOpcode() == ARM::LDM_UPD ||
-                    MI->getOpcode() == ARM::LDM_RET ||
-                    MI->getOpcode() == ARM::t2LDM ||
-                    MI->getOpcode() == ARM::t2LDM_UPD ||
-                    MI->getOpcode() == ARM::t2LDM_RET);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
   } else if (Modifier && strcmp(Modifier, "wide") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
     if (Mode == ARM_AM::ia)
       O << ".w";
   } else {
     printOperand(MI, Op);
-    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
-      O << "!";
   }
 }
 
@@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
     O << getRegisterName(MO1.getReg());
-    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
-      O << "!";
     return;
   }
 
@@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
 
   if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
     O << ", #"
-      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
       << ImmOffs*4;
   }
   O << "]";
@@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
 void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-  const MachineOperand &MO4 = MI->getOperand(Op+3);
 
   O << "[" << getRegisterName(MO1.getReg());
-  if (MO4.getImm()) {
+  if (MO2.getImm()) {
     // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << MO4.getImm();
+    O << ", :" << MO2.getImm();
   }
   O << "]";
+}
 
-  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
-    if (MO2.getReg() == 0)
-      O << "!";
-    else
-      O << ", " << getRegisterName(MO2.getReg());
-  }
+void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO = MI->getOperand(Op);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
 }
 
 void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
@@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
 
   const MachineOperand &MO1 = MI->getOperand(Op);
   assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[pc, +" << getRegisterName(MO1.getReg()) << "]";
+  O << "[pc, " << getRegisterName(MO1.getReg()) << "]";
 }
 
 void
@@ -627,10 +612,11 @@ void
 ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
   // (3 - the number of trailing zeros) is the number of then / else.
   unsigned Mask = MI->getOperand(Op).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
   unsigned NumTZ = CountTrailingZeros_32(Mask);
   assert(NumTZ <= 3 && "Invalid IT mask!");
   for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
-    bool T = (Mask & (1 << Pos)) == 0;
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
     if (T)
       O << 't';
     else
@@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
   if (MO3.getReg())
     O << ", " << getRegisterName(MO3.getReg());
   else if (unsigned ImmOffs = MO2.getImm())
-    O << ", #+" << ImmOffs * Scale;
+    O << ", #" << ImmOffs * Scale;
   O << "]";
 }
 
@@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   const MachineOperand &MO2 = MI->getOperand(Op+1);
   O << "[" << getRegisterName(MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm())
-    O << ", #+" << ImmOffs*4;
+    O << ", #" << ImmOffs*4;
   O << "]";
 }
 
@@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
 
   unsigned OffImm = MO2.getImm();
   if (OffImm)  // Don't print +0.
-    O << ", #+" << OffImm;
+    O << ", #" << OffImm;
   O << "]";
 }
 
@@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
   if (OffImm < 0)
     O << ", #-" << -OffImm;
   else if (OffImm > 0)
-    O << ", #+" << OffImm;
+    O << ", #" << OffImm;
   O << "]";
 }
 
@@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
   if (OffImm < 0)
     O << ", #-" << -OffImm * 4;
   else if (OffImm > 0)
-    O << ", #+" << OffImm * 4;
+    O << ", #" << OffImm * 4;
   O << "]";
 }
 
@@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
   if (OffImm < 0)
     O << "#-" << -OffImm;
   else if (OffImm > 0)
-    O << "#+" << OffImm;
+    O << "#" << OffImm;
 }
 
 void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index a2084b0c4ab1..30763a9952e4 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -28,7 +28,159 @@ using namespace llvm;
 #undef MachineInstr
 #undef ARMAsmPrinter
 
-void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+static unsigned NextReg(unsigned Reg) {
+  switch (Reg) {
+  default:
+    assert(0 && "Unexpected register enum");
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D1:
+    return ARM::D2;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D3:
+    return ARM::D4;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D5:
+    return ARM::D6;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D7:
+    return ARM::D8;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D9:
+    return ARM::D10;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D11:
+    return ARM::D12;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D13:
+    return ARM::D14;
+  case ARM::D14:
+    return ARM::D15;
+  case ARM::D15:
+    return ARM::D16;
+  case ARM::D16:
+    return ARM::D17;
+  case ARM::D17:
+    return ARM::D18;
+  case ARM::D18:
+    return ARM::D19;
+  case ARM::D19:
+    return ARM::D20;
+  case ARM::D20:
+    return ARM::D21;
+  case ARM::D21:
+    return ARM::D22;
+  case ARM::D22:
+    return ARM::D23;
+  case ARM::D23:
+    return ARM::D24;
+  case ARM::D24:
+    return ARM::D25;
+  case ARM::D25:
+    return ARM::D26;
+  case ARM::D26:
+    return ARM::D27;
+  case ARM::D27:
+    return ARM::D28;
+  case ARM::D28:
+    return ARM::D29;
+  case ARM::D29:
+    return ARM::D30;
+  case ARM::D30:
+    return ARM::D31;
+  }
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI) {
+  // Check for MOVs and print canonical forms, instead.
+  if (MI->getOpcode() == ARM::MOVs) {
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6);
+    printPredicateOperand(MI, 4);
+
+    O << '\t' << getRegisterName(Dst.getReg())
+      << ", " << getRegisterName(MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+      return;
+
+    O << ", ";
+
+    if (MO2.getReg()) {
+      O << getRegisterName(MO2.getReg());
+      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    } else {
+      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+    }
+    return;
+  }
+
+  // A8.6.123 PUSH
+  if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
+      O << '\t' << "push";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.122 POP
+  if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
+      O << '\t' << "pop";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.355 VPUSH
+  if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+      O << '\t' << "vpush";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  // A8.6.354 VPOP
+  if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    const MCOperand &MO1 = MI->getOperand(2);
+    if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+      O << '\t' << "vpop";
+      printPredicateOperand(MI, 3);
+      O << '\t';
+      printRegisterList(MI, 5);
+      return;
+    }
+  }
+
+  printInstruction(MI);
+ }
 
 void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                   const char *Modifier) {
@@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     if (Modifier && strcmp(Modifier, "dregpair") == 0) {
+      O << '{' << getRegisterName(Reg) << ", "
+               << getRegisterName(NextReg(Reg)) << '}';
+#if 0
       // FIXME: Breaks e.g. ARM/vmul.ll.
       assert(0);
       /*
@@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << '{'
       << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
       << '}';*/
+#endif
     } else if (Modifier && strcmp(Modifier, "lane") == 0) {
       assert(0);
       /*
@@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << getRegisterName(Reg);
     }
   } else if (Op.isImm()) {
-    assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+    assert((Modifier && !strcmp(Modifier, "call")) ||
+           ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
     O << '#' << Op.getImm();
   } else {
     assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
@@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) {
   O << "[" << getRegisterName(MO1.getReg());
   
   if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
       O << ", #"
-      << (char)ARM_AM::getAM2Op(MO3.getImm())
-      << ARM_AM::getAM2Offset(MO3.getImm());
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm());
     O << "]";
     return;
   }
   
   O << ", "
-  << (char)ARM_AM::getAM2Op(MO3.getImm())
-  << getRegisterName(MO2.getReg());
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
   
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
     O << ", "
@@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
     assert(ImmOffs && "Malformed indexed load / store!");
-    O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs;
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+      << ImmOffs;
     return;
   }
   
-  O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg());
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+    << getRegisterName(MO1.getReg());
   
   if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
     O << ", "
@@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) {
   
   if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
     O << ", #"
-    << (char)ARM_AM::getAM3Op(MO3.getImm())
-    << ImmOffs;
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+      << ImmOffs;
   O << ']';
 }
 
@@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
   assert(ImmOffs && "Malformed indexed load / store!");
-  O << "#"
-  << (char)ARM_AM::getAM3Op(MO2.getImm())
-  << ImmOffs;
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+    << ImmOffs;
 }
 
 
 void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum,
                                            const char *Modifier) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
   const MCOperand &MO2 = MI->getOperand(OpNum+1);
   ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
   if (Modifier && strcmp(Modifier, "submode") == 0) {
-    if (MO1.getReg() == ARM::SP) {
-      // FIXME
-      bool isLDM = (MI->getOpcode() == ARM::LDM ||
-                    MI->getOpcode() == ARM::LDM_RET ||
-                    MI->getOpcode() == ARM::t2LDM ||
-                    MI->getOpcode() == ARM::t2LDM_RET);
-      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
-    } else
-      O << ARM_AM::getAMSubModeStr(Mode);
+    O << ARM_AM::getAMSubModeStr(Mode);
   } else if (Modifier && strcmp(Modifier, "wide") == 0) {
     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
     if (Mode == ARM_AM::ia)
       O << ".w";
   } else {
     printOperand(MI, OpNum);
-    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
-      O << "!";
   }
 }
 
@@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   } else if (Modifier && strcmp(Modifier, "base") == 0) {
     // Used for FSTM{D|S} and LSTM{D|S} operations.
     O << getRegisterName(MO1.getReg());
-    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
-      O << "!";
     return;
   }
   
@@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   
   if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
     O << ", #"
-      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
       << ImmOffs*4;
   }
   O << "]";
@@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
   
-  // FIXME: No support yet for specifying alignment.
-  O << '[' << getRegisterName(MO1.getReg()) << ']';
-  
-  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
-    if (MO2.getReg() == 0)
-      O << '!';
-    else
-      O << ", " << getRegisterName(MO2.getReg());
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO2.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << MO2.getImm();
   }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
 }
 
 void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
@@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
 
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) {
   O << "{";
-  // Always skip the first operand, it's the optional (and implicit writeback).
-  for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum+1) O << ", ";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum) O << ", ";
     O << getRegisterName(MI->getOperand(i).getReg());
   }
   O << "}";
 }
 
+void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned option = Op.getImm();
+  unsigned mode = option & 31;
+  bool changemode = option >> 5 & 1;
+  unsigned AIF = option >> 6 & 7;
+  unsigned imod = option >> 9 & 3;
+  if (imod == 2)
+    O << "ie";
+  else if (imod == 3)
+    O << "id";
+  O << '\t';
+  if (imod > 1) {
+    if (AIF & 4) O << 'a';
+    if (AIF & 2) O << 'i';
+    if (AIF & 1) O << 'f';
+    if (AIF > 0 && changemode) O << ", ";
+  }
+  if (changemode)
+    O << '#' << mode;
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned Mask = Op.getImm();
+  if (Mask) {
+    O << '_';
+    if (Mask & 8) O << 'f';
+    if (Mask & 4) O << 's';
+    if (Mask & 2) O << 'x';
+    if (Mask & 1) O << 'c';
+  }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << '#';
+  if (Op.getImm() < 0)
+    O << '-' << (-Op.getImm() - 1);
+  else
+    O << Op.getImm();
+}
+
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   if (CC != ARMCC::AL)
@@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
   O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
 }
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op)
+{
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << getRegisterName(MO1.getReg());
+  O << ", " << getRegisterName(MO2.getReg()) << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
+                                                  unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO3.getReg())
+    O << ", " << getRegisterName(MO3.getReg());
+  else if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op)
+{
+  printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs*4;
+  O << "]";
+}
+
+void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) {
+  O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MI->getOpcode() == ARM::t2TBH)
+    O << ", lsl #1";
+  O << ']';
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  O << getRegisterName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  unsigned OffImm = MO2.getImm();
+  if (OffImm)  // Don't print +0.
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << ", #" << OffImm * 4;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+                                                     unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+                                                        unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) {
+  O << '#' << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) {
+  O << '#' << MI->getOperand(OpNum).getImm();
+}
+
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 9a3cbc3f5d04..d41b5dfa6c5b 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -48,32 +48,33 @@ public:
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
                              const char *Modifier = 0);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum);
   void printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
                               const char *Modifier = 0);
 
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
 
   void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {}
+  void printThumbITMask(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum);
   void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
-                                    unsigned Scale) {}
-  void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {}
-  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {}
+                                    unsigned Scale);
+  void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum);
   
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {}
-  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum);
   
-  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {}
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {}
-  void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {}
+  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum);
+  void printNegZeroOperand(const MCInst *MI, unsigned OpNum);
   void printPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
@@ -82,10 +83,10 @@ public:
                           const char *Modifier);
   void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {}
   void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {}
-  void printTBAddrMode(const MCInst *MI, unsigned OpNum) {}
+  void printTBAddrMode(const MCInst *MI, unsigned OpNum);
   void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
-  void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
-  void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum);
+  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum);
   void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
   void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
   void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index d9942c8c840a..c36fe63b0693 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 4;
     return true;
 
-  case ARM::VLD2LNq16a:
-  case ARM::VLD2LNq32a:
+  case ARM::VLD2LNq16:
+  case ARM::VLD2LNq32:
     FirstOpnd = 0;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD2LNq16b:
-  case ARM::VLD2LNq32b:
+  case ARM::VLD2LNq16odd:
+  case ARM::VLD2LNq32odd:
     FirstOpnd = 0;
     NumRegs = 2;
     Offset = 1;
@@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 3;
     return true;
 
-  case ARM::VLD3q8a:
-  case ARM::VLD3q16a:
-  case ARM::VLD3q32a:
+  case ARM::VLD3q8_UPD:
+  case ARM::VLD3q16_UPD:
+  case ARM::VLD3q32_UPD:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD3q8b:
-  case ARM::VLD3q16b:
-  case ARM::VLD3q32b:
+  case ARM::VLD3q8odd_UPD:
+  case ARM::VLD3q16odd_UPD:
+  case ARM::VLD3q32odd_UPD:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VLD3LNq16a:
-  case ARM::VLD3LNq32a:
+  case ARM::VLD3LNq16:
+  case ARM::VLD3LNq32:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD3LNq16b:
-  case ARM::VLD3LNq32b:
+  case ARM::VLD3LNq16odd:
+  case ARM::VLD3LNq32odd:
     FirstOpnd = 0;
     NumRegs = 3;
     Offset = 1;
@@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
     NumRegs = 4;
     return true;
 
-  case ARM::VLD4q8a:
-  case ARM::VLD4q16a:
-  case ARM::VLD4q32a:
+  case ARM::VLD4q8_UPD:
+  case ARM::VLD4q16_UPD:
+  case ARM::VLD4q32_UPD:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD4q8b:
-  case ARM::VLD4q16b:
-  case ARM::VLD4q32b:
+  case ARM::VLD4q8odd_UPD:
+  case ARM::VLD4q16odd_UPD:
+  case ARM::VLD4q32odd_UPD:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VLD4LNq16a:
-  case ARM::VLD4LNq32a:
+  case ARM::VLD4LNq16:
+  case ARM::VLD4LNq32:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VLD4LNq16b:
-  case ARM::VLD4LNq32b:
+  case ARM::VLD4LNq16odd:
+  case ARM::VLD4LNq32odd:
     FirstOpnd = 0;
     NumRegs = 4;
     Offset = 1;
@@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST2LNd8:
   case ARM::VST2LNd16:
   case ARM::VST2LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 2;
     return true;
 
   case ARM::VST2q8:
   case ARM::VST2q16:
   case ARM::VST2q32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 4;
     return true;
 
-  case ARM::VST2LNq16a:
-  case ARM::VST2LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST2LNq16:
+  case ARM::VST2LNq32:
+    FirstOpnd = 2;
     NumRegs = 2;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST2LNq16b:
-  case ARM::VST2LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST2LNq16odd:
+  case ARM::VST2LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 2;
     Offset = 1;
     Stride = 2;
@@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST3LNd8:
   case ARM::VST3LNd16:
   case ARM::VST3LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 3;
     return true;
 
-  case ARM::VST3q8a:
-  case ARM::VST3q16a:
-  case ARM::VST3q32a:
-    FirstOpnd = 5;
+  case ARM::VST3q8_UPD:
+  case ARM::VST3q16_UPD:
+  case ARM::VST3q32_UPD:
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST3q8b:
-  case ARM::VST3q16b:
-  case ARM::VST3q32b:
-    FirstOpnd = 5;
+  case ARM::VST3q8odd_UPD:
+  case ARM::VST3q16odd_UPD:
+  case ARM::VST3q32odd_UPD:
+    FirstOpnd = 4;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VST3LNq16a:
-  case ARM::VST3LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST3LNq16:
+  case ARM::VST3LNq32:
+    FirstOpnd = 2;
     NumRegs = 3;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST3LNq16b:
-  case ARM::VST3LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST3LNq16odd:
+  case ARM::VST3LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 3;
     Offset = 1;
     Stride = 2;
@@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
   case ARM::VST4LNd8:
   case ARM::VST4LNd16:
   case ARM::VST4LNd32:
-    FirstOpnd = 4;
+    FirstOpnd = 2;
     NumRegs = 4;
     return true;
 
-  case ARM::VST4q8a:
-  case ARM::VST4q16a:
-  case ARM::VST4q32a:
-    FirstOpnd = 5;
+  case ARM::VST4q8_UPD:
+  case ARM::VST4q16_UPD:
+  case ARM::VST4q32_UPD:
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST4q8b:
-  case ARM::VST4q16b:
-  case ARM::VST4q32b:
-    FirstOpnd = 5;
+  case ARM::VST4q8odd_UPD:
+  case ARM::VST4q16odd_UPD:
+  case ARM::VST4q32odd_UPD:
+    FirstOpnd = 4;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
     return true;
 
-  case ARM::VST4LNq16a:
-  case ARM::VST4LNq32a:
-    FirstOpnd = 4;
+  case ARM::VST4LNq16:
+  case ARM::VST4LNq32:
+    FirstOpnd = 2;
     NumRegs = 4;
     Offset = 0;
     Stride = 2;
     return true;
 
-  case ARM::VST4LNq16b:
-  case ARM::VST4LNq32b:
-    FirstOpnd = 4;
+  case ARM::VST4LNq16odd:
+  case ARM::VST4LNq32odd:
+    FirstOpnd = 2;
     NumRegs = 4;
     Offset = 1;
     Stride = 2;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f5ba155f4023..f36d4ef7567e 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
       DebugLoc ndl = NMI->getDebugLoc();
       unsigned NPredReg = 0;
       ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
-      if (NCC == OCC) {
-        Mask |= (1 << Pos);
-      } else if (NCC != CC)
+      if (NCC == CC || NCC == OCC)
+        Mask |= (NCC & 1) << Pos;
+      else
         break;
       --Pos;
       ++MBBI;
     }
     Mask |= (1 << Pos);
+    // Tag along (firstcond[0] << 4) with the mask.
+    Mask |= (CC & 1) << 4;
     MIB.addImm(Mask);
     Modified = true;
     ++NumITs;
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
index 6d82875fad2d..d98455628887 100644
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -56,16 +56,16 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
         : InstAlpha<opcode, asmstr, itin> {    
   bits<5> Ra;
 
-  let OutOperandList = (ops GPRC:$RA);
-  let InOperandList = (ops);
+  let OutOperandList = (outs GPRC:$RA);
+  let InOperandList = (ins);
   let Inst{25-21} = Ra;
   let Inst{20-16} = 0;
   let Inst{15-0} = fc;
 }
 class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
         : InstAlpha<opcode, asmstr, itin> {    
-  let OutOperandList = (ops);
-  let InOperandList = (ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   let Inst{25-21} = 0;
   let Inst{20-16} = 0;
   let Inst{15-0} = fc;
@@ -77,7 +77,7 @@ class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass
   bits<5> Rb;
   bits<14> disp;
 
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
 
   let Inst{25-21} = Ra;
@@ -92,7 +92,7 @@ class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> patt
   bits<5> Rb;
   bits<14> disp;
 
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
 
   let Inst{25-21} = Ra;
@@ -107,7 +107,7 @@ def target : Operand<OtherVT> {}
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
    : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
   bits<64> Opc; //dummy
   bits<5> Ra;
@@ -122,8 +122,8 @@ let isBranch = 1, isTerminator = 1 in
 class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> 
     : InstAlpha<opcode, asmstr, itin> {
   let Pattern = pattern;
-  let OutOperandList = (ops);
-  let InOperandList = (ops target:$DISP);
+  let OutOperandList = (outs);
+  let InOperandList = (ins target:$DISP);
   bits<5> Ra;
   bits<21> disp;
 
@@ -250,7 +250,7 @@ class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, Ins
 //3.3.5
 class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
     : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (ops);
+  let OutOperandList = (outs);
   let InOperandList = OL;
   bits<26> Function;
 
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 91e58ceef4bc..d5d5e022116e 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -392,12 +392,12 @@ def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0
 
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
-  def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
-  def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+  def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+  def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
 }
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
-def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0", 
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", 
           [(brind GPRC:$RS)], s_jsr>; //Jump
 
 let isCall = 1, Ra = 26,
@@ -414,18 +414,18 @@ let isCall = 1, Ra = 26, Rb = 27, disp = 0,
             F0, F1,
             F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
             F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
-    def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+    def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
 }
 
 let isCall = 1, Ra = 23, Rb = 27, disp = 0,
     Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
-  def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+  def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
 
 
-def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
 
 
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDQ   : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
                  [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
 def LDQr  : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
@@ -445,7 +445,7 @@ def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
 }
 
 
-let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STB   : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
                  [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
 def STBr  : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
@@ -465,7 +465,7 @@ def STQr  : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
 }
 
 //Load address
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDA   : MForm<0x08, 0, "lda $RA,$DISP($RB)",
                  [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
 def LDAr  : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
@@ -476,25 +476,25 @@ def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
                  [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address high
 }
 
-let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STS  : MForm<0x26, 0, "sts $RA,$DISP($RB)",
                 [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
 def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
                 [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
 }
-let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDS  : MForm<0x22, 1, "lds $RA,$DISP($RB)",
                 [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
 def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
                 [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
 }
-let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
 def STT  : MForm<0x27, 0, "stt $RA,$DISP($RB)",
                  [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
 def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
                  [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
 }
-let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
 def LDT  : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
                 [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
 def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
@@ -570,15 +570,15 @@ def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
 
 
 //load address, rellocated gpdist form
-let OutOperandList = (ops GPRC:$RA),
-    InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
     mayLoad = 1 in {
 def LDAg  : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
 def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
 }
 
 //Load quad, rellocated literal form
-let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in 
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in 
 def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
                  [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
 def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
@@ -591,8 +591,8 @@ let OutOperandList = (outs GPRC:$RR),
 def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
 def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
 }
-let OutOperandList = (ops GPRC:$RA),
-    InOperandList = (ops s64imm:$DISP, GPRC:$RB),
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s64imm:$DISP, GPRC:$RB),
     mayLoad = 1 in {
 def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
 def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
@@ -611,11 +611,11 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
 
 //Floats
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
 def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
                    [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in {
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
 def ADDS  : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
                    [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
 def SUBS  : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
@@ -634,11 +634,11 @@ def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
 
 //Doubles
 
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
                    [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
 
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in {
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
 def ADDT  : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
                    [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
 def SUBT  : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
@@ -665,13 +665,13 @@ def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
 }
 
 //More CPYS forms:
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in {
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
 def CPYSTs  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
                    [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
 def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
                    [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
 }
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in {
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
 def CPYSSt  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
                    [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
 def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
@@ -680,7 +680,7 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
 }
 
 //conditional moves, floats
-let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
     isTwoAddress = 1 in {
 def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
 def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
@@ -690,7 +690,7 @@ def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>;
 def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
 }
 //conditional moves, doubles
-let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
     isTwoAddress = 1 in {
 def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
 def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
@@ -790,33 +790,33 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
 
 
 
-let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in 
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in 
 def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
         [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
-let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in 
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in 
 def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
         [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
 def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
     	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
 def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
         [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
 
 
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
         [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
         [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
         [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
-let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
 def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
                    [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
-let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in 
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
 def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
                    [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
 
@@ -829,20 +829,20 @@ def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
 //Branching
 /////////////////////////////////////////////////////////
 class br_icc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst), 
+  : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), 
     !strconcat(asmstr, " $R,$dst"),  s_icbr>;
 class br_fcc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst), 
+  : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), 
     !strconcat(asmstr, " $R,$dst"),  s_fbr>;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 let Ra = 31 in
 def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
 
-def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst), 
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
                     "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", 
                     s_icbr>;
-def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst), 
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), 
                     "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
                     s_fbr>;
 //Branches, int
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 88ff85f5d4f8..e3c3993a0ce3 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -29,7 +29,8 @@ def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
 
 def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 
 def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
                     [SDNPHasChain, SDNPOptInFlag]>;
@@ -610,7 +611,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
 
 def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
                       "$dst = $cc;",
-                      [(set D:$dst, (zext JustCC:$cc))]>;
+                     [/*(set D:$dst, (zext JustCC:$cc))*/]>;
 
 def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
                    "$dst = cc;", []>;
@@ -859,10 +860,10 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
 def : Pat<(BfinCall (i32 texternalsym:$dst)),
           (CALLa texternalsym:$dst)>;
 
-def : Pat<(sext JustCC:$cc),
-          (NEG (MOVECC_zext JustCC:$cc))>;
-def : Pat<(anyext JustCC:$cc),
-          (MOVECC_zext JustCC:$cc)>;
+//def : Pat<(sext JustCC:$cc),
+//          (NEG (MOVECC_zext JustCC:$cc))>;
+//def : Pat<(anyext JustCC:$cc),
+//          (MOVECC_zext JustCC:$cc)>;
 def : Pat<(i16 (zext JustCC:$cc)),
           (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
 def : Pat<(i16 (sext JustCC:$cc)),
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index ea9480d76255..34a8d3809ea2 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -53,6 +53,10 @@ std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
 
 unsigned
 BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
 #define GET_FUNCTION_RECOGNIZER
 #include "BlackfinGenIntrinsics.inc"
 #undef GET_FUNCTION_RECOGNIZER
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index b1ba0d2b9fd4..0c265adf7419 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -111,7 +111,8 @@ namespace {
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
       : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
-        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) {
+        TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
+        NextAnonValueNumber(0) {
       FPCounter = 0;
     }
 
@@ -147,6 +148,8 @@ namespace {
       delete IL;
       delete TD;
       delete Mang;
+      delete TCtx;
+      delete TAsm;
       FPConstantMap.clear();
       TypeNames.clear();
       ByValParams.clear();
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index ad12604faa19..5068f77a85be 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1133,16 +1133,14 @@ class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
       "xsbh\t$rDst, $rSrc",
       IntegerOp, pattern>;
 
-class XSBHVecInst<ValueType vectype>:
-    XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-      [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
-
 class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
     XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
              pattern>;
 
 multiclass ExtendByteHalfword {
-  def v16i8:     XSBHVecInst<v8i16>;
+  def v16i8:     XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+                          [
+                  /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
   def r8:        XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
                           [(set R16C:$rDst, (sext R8C:$rSrc))]>;
   def r16:       XSBHInRegInst<R16C,
@@ -1200,8 +1198,8 @@ class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
       
 class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
     XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-             [(set (out_vectype VECREG:$rDst),
-                   (sext (out_vectype VECREG:$rSrc)))]>;
+             [/*(set (out_vectype VECREG:$rDst),
+                   (sext (out_vectype VECREG:$rSrc)))*/]>;
       
 class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
     XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
@@ -4146,7 +4144,7 @@ def CFSif32 :
 def FESDvec :
     RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
       "fesd\t$rT, $rA", SPrecFP,
-      [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>;
+      [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
 
 def FESDf32 :
     RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 85078616a823..846c7edc02a2 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -28,7 +28,8 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
 
 def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 
 // Operand type constraints for vector shuffle/permute operations
 def SDT_SPUshuffle   : SDTypeProfile<1, 3, [
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index c8faffc84527..4931860912a1 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -57,6 +57,10 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
 
 unsigned MBlazeIntrinsicInfo::
 lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
 #define GET_FUNCTION_RECOGNIZER
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_FUNCTION_RECOGNIZER
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index cef3697fa895..2b9e94191dba 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -29,7 +29,8 @@ def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, 
-                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+                          SDNPVariadic]>;
 
 // Hi and Lo nodes are used to handle global addresses. Used on 
 // MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index b6eceb36a473..1001d29fd8a4 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -184,7 +184,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       // by any chance, as we do not link in those as .bc lib. So these calls
       // are always external and it is safe to emit an extern.
       if (PAN::isMemIntrinsic(Sym->getName()))
-        LibcallDecls.push_back(createESName(Sym->getName()));
+        LibcallDecls.insert(Sym->getName());
 
       O << *Sym;
       break;
@@ -199,7 +199,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
           Printname = PAN::Rename(Sname);
         }
         // Record these decls, we need to print them in asm as extern.
-        LibcallDecls.push_back(createESName(Printname));
+        LibcallDecls.insert(Printname);
       }
 
       O << Printname;
@@ -221,18 +221,6 @@ void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
   O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
 }
 
-// This function is used to sort the decls list.
-// should return true if s1 should come before s2.
-static bool is_before(const char *s1, const char *s2) {
-  return strcmp(s1, s2) <= 0;
-}
-
-// This is used by list::unique below. 
-// unique will filter out duplicates if it knows them.
-static bool is_duplicate(const char *s1, const char *s2) {
-  return !strcmp(s1, s2);
-}
-
 /// printLibcallDecls - print the extern declarations for compiler 
 /// intrinsics.
 ///
@@ -241,12 +229,9 @@ void PIC16AsmPrinter::printLibcallDecls() {
   if (LibcallDecls.empty()) return;
 
   O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n";
-  // Remove duplicate entries.
-  LibcallDecls.sort(is_before);
-  LibcallDecls.unique(is_duplicate);
 
-  for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); 
-       I != LibcallDecls.end(); I++) {
+  for (std::set<std::string>::const_iterator I = LibcallDecls.begin(),
+       E = LibcallDecls.end(); I != E; I++) {
     O << MAI->getExternDirective() << *I << "\n";
   }
   O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index 519be4cf8f25..8063fcc6fc93 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
 #include <list>
+#include <set>
 #include <string>
 
 namespace llvm {
@@ -80,7 +81,7 @@ namespace llvm {
     PIC16TargetLowering *PTLI;
     PIC16DbgInfo DbgInfo;
     const PIC16MCAsmInfo *PMAI;
-    std::list<const char *> LibcallDecls; // List of extern decls.
+    std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
     std::vector<const GlobalVariable *> ExternalVarDecls;
     std::vector<const GlobalVariable *> ExternalVarDefs;
   };
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 3a8bbfb5fb43..566f9207dbcd 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -45,7 +45,7 @@ namespace llvm {
     
     PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, 
                  int color)
-      : MCSection(K), Name(name), Address(addr), Color(color) {
+      : MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
     }
     
   public:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 845cd8f3a71e..532a3ecc17d8 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -111,9 +111,11 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            SDNPVariadic]>;
 def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
@@ -124,16 +126,18 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                               SDNPVariadic]>;
 
 def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
 
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
-                        [SDNPHasChain,  SDNPOptInFlag]>;
+                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
 
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index cac69622caf6..c4a7408a6637 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
   if (TheTriple.getOS() == Triple::Darwin)
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index d88d5080c6f7..9489580e900c 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -527,11 +527,11 @@ let Uses = [O0, O1, O2, O3, O4, O5],
   def JMPLrr : F3_1<2, 0b111000,
                     (outs), (ins MEMrr:$ptr),
                     "call $ptr",
-                    [(call  ADDRrr:$ptr)]>;
+                    [(call ADDRrr:$ptr)]>;
   def JMPLri : F3_2<2, 0b111000,
                     (outs), (ins MEMri:$ptr),
                     "call $ptr",
-                    [(call  ADDRri:$ptr)]>;
+                    [(call ADDRri:$ptr)]>;
 }
 
 // Section B.28 - Read State Register Instructions
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index a75b85de6f01..0d1af23cba14 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -47,7 +47,7 @@ def SDT_Address             : SDTypeProfile<1, 1,
 def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
                      [SDNPHasChain, SDNPOptInFlag]>;
 def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
-                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
 def SystemZcallseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
                         [SDNPHasChain, SDNPOutFlag]>;
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 9a168087aedc..643b3977461b 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -460,6 +460,15 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
   case Type::StructTyID:
     // Get the layout annotation... which is lazily created on demand.
     return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    uint64_t Size = 0;
+    for (UnionType::element_iterator i = UnTy->element_begin(),
+             e = UnTy->element_end(); i != e; ++i) {
+      Size = std::max(Size, getTypeSizeInBits(*i));
+    }
+    return Size;
+  }
   case Type::IntegerTyID:
     return cast<IntegerType>(Ty)->getBitWidth();
   case Type::VoidTyID:
@@ -516,6 +525,17 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
     unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
     return std::max(Align, (unsigned)Layout->getAlignment());
   }
+  case Type::UnionTyID: {
+    const UnionType *UnTy = cast<UnionType>(Ty);
+    unsigned Align = 1;
+
+    // Unions need the maximum alignment of all their entries
+    for (UnionType::element_iterator i = UnTy->element_begin(), 
+             e = UnTy->element_end(); i != e; ++i) {
+      Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
+    }
+    return Align;
+  }
   case Type::IntegerTyID:
   case Type::VoidTyID:
     AlignType = INTEGER_ALIGN;
@@ -600,6 +620,11 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
 
       // Update Ty to refer to current element
       Ty = STy->getElementType(FieldNo);
+    } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
+        unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+        // Offset into union is canonically 0, but type changes
+        Ty = UnTy->getElementType(FieldNo);
     } else {
       // Update Ty to refer to current element
       Ty = cast<SequentialType>(Ty)->getElementType();
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a093e2db8b2e..44722b39e311 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -317,7 +317,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
   case dwarf::DW_EH_PE_pcrel: {
     // Emit a label to the streamer for the current position.  This gives us
     // .-foo addressing.
-    MCSymbol *PCSym = getContext().GetOrCreateTemporarySymbol();
+    MCSymbol *PCSym = getContext().CreateTempSymbol();
     Streamer.EmitLabel(PCSym);
     const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
     return MCBinaryExpr::CreateSub(Res, PC, getContext());
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index dde86fbbe440..47873d14a911 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -29,6 +29,9 @@ struct X86Operand;
 class X86ATTAsmParser : public TargetAsmParser {
   MCAsmParser &Parser;
 
+protected:
+  unsigned Is64Bit : 1;
+
 private:
   MCAsmParser &getParser() const { return Parser; }
 
@@ -45,6 +48,8 @@ private:
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
+  void InstructionCleanup(MCInst &Inst);
+
   /// @name Auto-generated Match Functions
   /// {  
 
@@ -62,7 +67,23 @@ public:
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
-  
+ 
+class X86_32ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+    : X86ATTAsmParser(T, _Parser) {
+    Is64Bit = false;
+  }
+};
+
+class X86_64ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+    : X86ATTAsmParser(T, _Parser) {
+    Is64Bit = true;
+  }
+};
+
 } // end anonymous namespace
 
 /// @name Auto-generated Match Functions
@@ -548,8 +569,10 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
       Operands.size() == 3 &&
       static_cast<X86Operand*>(Operands[1])->isImm() &&
       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
-      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1)
+      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
+    delete Operands[1];
     Operands.erase(Operands.begin() + 1);
+  }
 
   return false;
 }
@@ -586,12 +609,30 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   return false;
 }
 
+// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
+// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
+// proper mechanism for supporting (ambiguous) feature dependent instructions.
+void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
+  if (!Is64Bit) return;
+
+  switch (Inst.getOpcode()) {
+  case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
+  case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
+  case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
+  case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
+  case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
+  case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
+  case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
+  case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+  }
+}
+
 extern "C" void LLVMInitializeX86AsmLexer();
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
-  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
   LLVMInitializeX86AsmLexer();
 }
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index cbfc57a0ab74..7d29d97ce0ee 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -427,7 +427,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //   MYGLOBAL + (. - PICBASE)
     // However, we can't generate a ".", so just emit a new label here and refer
     // to it.
-    MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol();
+    MCSymbol *DotSym = OutContext.CreateTempSymbol();
     OutStreamer.EmitLabel(DotSym);
     
     // Now that we have emitted the label, lower the complex operand expression.
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index a31686027a09..7b7b5cb4f574 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -459,11 +459,11 @@ static void translateInstruction(MCInst &mcInst,
   }
 }
 
-static const MCDisassembler *createX86_32Disassembler(const Target &T) {
+static MCDisassembler *createX86_32Disassembler(const Target &T) {
   return new X86Disassembler::X86_32Disassembler;
 }
 
-static const MCDisassembler *createX86_64Disassembler(const Target &T) {
+static MCDisassembler *createX86_64Disassembler(const Target &T) {
   return new X86Disassembler::X86_64Disassembler;
 }
 
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index a0a04ba4a14c..4f02ed46e38b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1355,8 +1355,8 @@ int decodeInstruction(struct InternalInstruction* insn,
   
   insn->length = insn->readerCursor - insn->startLocation;
   
-  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
-          startLoc, insn->readerCursor, insn->length);
+  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+            startLoc, insn->readerCursor, insn->length);
     
   if (insn->length > 15)
     dbgprintf(insn, "Instruction exceeds 15-byte limit");
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6a4bdb58603b..2be51e1a1366 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -191,6 +191,7 @@ include "X86CallingConv.td"
 // Currently the X86 assembly parser only supports ATT syntax.
 def ATTAsmParser : AsmParser {
   string AsmParserClassName  = "ATTAsmParser";
+  string AsmParserInstCleanup  = "InstructionCleanup";
   int Variant = 0;
 
   // Discard comments in assembly strings.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index a44afc6984d3..754a20097908 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -9,39 +9,100 @@
 
 #include "llvm/Target/TargetAsmBackend.h"
 #include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MachObjectWriter.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
 namespace {
 
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
 class X86AsmBackend : public TargetAsmBackend {
 public:
   X86AsmBackend(const Target &T)
     : TargetAsmBackend(T) {}
+
+  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+                  uint64_t Value) const {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+
+    assert(Fixup.Offset + Size <= DF.getContents().size() &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+  }
+};
+
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+  ELFX86AsmBackend(const Target &T)
+    : X86AsmBackend(T) {
+    HasAbsolutizedSet = true;
+    HasScatteredSymbols = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return 0;
+  }
+
+  bool isVirtualSection(const MCSection &Section) const {
+    const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+    return SE.getType() == MCSectionELF::SHT_NOBITS;;
+  }
 };
 
 class DarwinX86AsmBackend : public X86AsmBackend {
 public:
   DarwinX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {}
+    : X86AsmBackend(T) {
+    HasAbsolutizedSet = true;
+    HasScatteredSymbols = true;
+  }
 
-  virtual bool hasAbsolutizedSet() const { return true; }
-
-  virtual bool hasScatteredSymbols() const { return true; }
+  bool isVirtualSection(const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
+            SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+  }
 };
 
 class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
 public:
   DarwinX86_32AsmBackend(const Target &T)
     : DarwinX86AsmBackend(T) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new MachObjectWriter(OS, /*Is64Bit=*/false);
+  }
 };
 
 class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
 public:
   DarwinX86_64AsmBackend(const Target &T)
-    : DarwinX86AsmBackend(T) {}
+    : DarwinX86AsmBackend(T) {
+    HasReliableSymbolDifference = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new MachObjectWriter(OS, /*Is64Bit=*/true);
+  }
 
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
     // Temporary labels in the string literals sections require symbols. The
@@ -65,7 +126,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_32AsmBackend(T);
   default:
-    return new X86AsmBackend(T);
+    return new ELFX86AsmBackend(T);
   }
 }
 
@@ -75,6 +136,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
   case Triple::Darwin:
     return new DarwinX86_64AsmBackend(T);
   default:
-    return new X86AsmBackend(T);
+    return new ELFX86AsmBackend(T);
   }
 }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 96b652d39e0d..5d3edbb9f9aa 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1166,6 +1166,21 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
+  case Intrinsic::stackprotector: {
+    // Emit code inline code to store the stack guard onto the stack.
+    EVT PtrTy = TLI.getPointerTy();
+
+    Value *Op1 = I.getOperand(1); // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+    // Grab the frame index.
+    X86AddressMode AM;
+    if (!X86SelectAddress(Slot, AM)) return false;
+    
+    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+    
+    return true;
+  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
     const Type *Ty = I.getCalledFunction()->getReturnType();
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index c8dac3ce457e..a8117d47bf66 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -17,7 +17,8 @@ namespace X86 {
 enum Fixups {
   reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
   reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
-  reloc_riprel_4byte                         // 32-bit rip-relative
+  reloc_riprel_4byte,                        // 32-bit rip-relative
+  reloc_riprel_4byte_movq_load               // 32-bit rip-relative in movq
 };
 }
 }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 405888565251..1c0ed7e6327c 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -140,6 +140,21 @@ namespace {
 }
 
 namespace {
+  class X86ISelListener : public SelectionDAG::DAGUpdateListener {
+    SmallSet<SDNode*, 4> Deletes;
+  public:
+    explicit X86ISelListener() {}
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      Deletes.insert(N);
+    }
+    virtual void NodeUpdated(SDNode *N) {
+      // Ignore updates.
+    }
+    bool IsDeleted(SDNode *N) {
+      return Deletes.count(N);
+    }
+  };
+
   //===--------------------------------------------------------------------===//
   /// ISel - X86 specific code to select X86 machine instructions for
   /// SelectionDAG operations.
@@ -187,6 +202,7 @@ namespace {
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                 X86ISelListener &DeadNodes,
                                  unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
     bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
@@ -651,7 +667,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 /// returning true if it cannot be done.  This just pattern matches for the
 /// addressing mode.
 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
-  if (MatchAddressRecursively(N, AM, 0))
+  X86ISelListener DeadNodes;
+  if (MatchAddressRecursively(N, AM, DeadNodes, 0))
     return true;
 
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -680,6 +697,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
 }
 
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                              X86ISelListener &DeadNodes,
                                               unsigned Depth) {
   bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
@@ -845,7 +863,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
     // Test if the LHS of the sub can be folded.
     X86ISelAddressMode Backup = AM;
-    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                DeadNodes, Depth+1) ||
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        DeadNodes.IsDeleted(N.getNode())) {
       AM = Backup;
       break;
     }
@@ -854,6 +876,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       AM = Backup;
       break;
     }
+
     int Cost = 0;
     SDValue RHS = N.getNode()->getOperand(1);
     // If the RHS involves a register with multiple uses, this
@@ -907,13 +930,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   case ISD::ADD: {
     X86ISelAddressMode Backup = AM;
-    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
-        !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
-      return false;
+    if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                 DeadNodes, Depth+1)) {
+      if (DeadNodes.IsDeleted(N.getNode()))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return true;
+      if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
+                                   DeadNodes, Depth+1))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return DeadNodes.IsDeleted(N.getNode());
+    }
+
+    // Try again after commuting the operands.
     AM = Backup;
-    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
-        !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
-      return false;
+    if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
+                                 DeadNodes, Depth+1)) {
+      if (DeadNodes.IsDeleted(N.getNode()))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return true;
+      if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
+                                   DeadNodes, Depth+1))
+        // If it is successful but the recursive update causes N to be deleted,
+        // then it's not safe to continue.
+        return DeadNodes.IsDeleted(N.getNode());
+    }
     AM = Backup;
 
     // If we couldn't fold both operands into the address at the same time,
@@ -935,16 +978,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       X86ISelAddressMode Backup = AM;
       uint64_t Offset = CN->getSExtValue();
+
+      // Check to see if the LHS & C is zero.
+      if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue()))
+        break;
+
       // Start with the LHS as an addr mode.
-      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+      if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
           // Address could not have picked a GV address for the displacement.
           AM.GV == NULL &&
           // On x86-64, the resultant disp must fit in 32-bits.
           (!is64Bit ||
            X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
-                                             AM.hasSymbolicDisplacement())) &&
-          // Check to see if the LHS & C is zero.
-          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+                                             AM.hasSymbolicDisplacement()))) {
         AM.Disp += Offset;
         return false;
       }
@@ -1015,7 +1061,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           CurDAG->RepositionNode(N.getNode(), Shl.getNode());
           Shl.getNode()->setNodeId(N.getNode()->getNodeId());
         }
-        CurDAG->ReplaceAllUsesWith(N, Shl);
+        CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes);
         AM.IndexReg = And;
         AM.Scale = (1 << ScaleLog);
         return false;
@@ -1066,7 +1112,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
     }
 
-    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+    CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes);
     
     AM.Scale = 1 << ShiftCst;
     AM.IndexReg = NewAND;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7d2140bf4d9d..704f9c65a59e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2310,6 +2310,28 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
+  // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+  // Therefore if it's not used by the call it is not safe to optimize this into
+  // a sibcall.
+  bool Unused = false;
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    if (!Ins[i].Used) {
+      Unused = true;
+      break;
+    }
+  }
+  if (Unused) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CalleeCC, false, getTargetMachine(),
+                   RVLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+    for (unsigned i = 0; i != RVLocs.size(); ++i) {
+      CCValAssign &VA = RVLocs[i];
+      if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+        return false;
+    }
+  }
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 4262c0ac0514..8cbb756441e6 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -144,7 +144,7 @@ let isCall = 1 in
     // NOTE: this pattern doesn't match "X86call imm", because we do not know
     // that the offset between an arbitrary immediate and the call will fit in
     // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
                           (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
                           "call{q}\t$dst", []>,
                         Requires<[In64BitMode, NotWin64]>;
@@ -511,6 +511,14 @@ def ADD64rr    : RI<0x01, MRMDestReg, (outs GR64:$dst),
                     [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
                      (implicit EFLAGS)]>;
 
+// These are alternate spellings for use by the disassembler, we mark them as
+// code gen only to ensure they aren't matched by the assembler.
+let isCodeGenOnly = 1 in {
+  def ADD64rr_alt  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
+                       (ins GR64:$src1, GR64:$src2),
+                       "add{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
+
 // Register-Integer Addition
 def ADD64ri8  : RIi8<0x83, MRM0r, (outs GR64:$dst), 
                      (ins GR64:$src1, i64i8imm:$src2),
@@ -531,12 +539,6 @@ def ADD64rm     : RI<0x03, MRMSrcMem, (outs GR64:$dst),
                      [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
                       (implicit EFLAGS)]>;
 
-// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but
-//   differently encoded.
-def ADD64mrmrr  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-
 } // isTwoAddress
 
 // Memory-Register Addition
@@ -1225,59 +1227,59 @@ let Defs = [EFLAGS] in {
 def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
                       "test{q}\t{$src, %rax|%rax, $src}", []>;
 let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp (and GR64:$src1, GR64:$src2), 0),
-                   (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
 def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0),
-                   (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
+                    0))]>;
 def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
                                         (ins GR64:$src1, i64i32imm:$src2),
                        "test{q}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
+                      0))]>;
 def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
                                         (ins i64mem:$src1, i64i32imm:$src2),
                        "test{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
+                                           i64immSExt32:$src2), 0))]>;
 
 
 def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
                      "cmp{q}\t{$src, %rax|%rax, $src}", []>;
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp GR64:$src1, GR64:$src2),
-                  (implicit EFLAGS)]>;
-def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
+                 [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
+
+// These are alternate spellings for use by the disassembler, we mark them as
+// code gen only to ensure they aren't matched by the assembler.
+let isCodeGenOnly = 1 in {
+  def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+                      "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
+}
+
 def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp (loadi64 addr:$src1), GR64:$src2),
-                   (implicit EFLAGS)]>;
+                 [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
 def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(X86cmp GR64:$src1, (loadi64 addr:$src2)),
-                  (implicit EFLAGS)]>;
+                 [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
 def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp GR64:$src1, i64immSExt8:$src2),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
 def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
                       "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(X86cmp GR64:$src1, i64immSExt32:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
 def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
+                                          i64immSExt8:$src2))]>;
 def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
                                        (ins i64mem:$src1, i64i32imm:$src2),
                       "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
+                                            i64immSExt32:$src2))]>;
 } // Defs = [EFLAGS]
 
 // Bit tests.
@@ -1285,8 +1287,7 @@ def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
 let Defs = [EFLAGS] in {
 def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                "bt{q}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR64:$src1, GR64:$src2),
-                (implicit EFLAGS)]>, TB;
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -1300,15 +1301,14 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 
 def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR64:$src1, i64immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
 def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
 
 def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
@@ -1938,7 +1938,7 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
 // Comparisons.
 
 // TEST R,R is smaller than CMP R,0
-def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR64:$src1, 0),
           (TEST64rr GR64:$src1, GR64:$src1)>;
 
 // Conditional moves with folded loads with operands swapped and conditions
@@ -2233,21 +2233,6 @@ def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (ADD64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Subtraction with EFLAGS result
 def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2266,24 +2251,6 @@ def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
                     (implicit EFLAGS)),
           (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
 
-// Memory-Register Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mr addr:$dst, GR64:$src2)>;
-
-// Memory-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), 
-                                        i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst),
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Signed Integer Multiplication with EFLAGS result
 def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2313,36 +2280,18 @@ def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
 // INC and DEC with EFLAGS result. Note that these do not set CF.
 def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
           (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64_16m addr:$dst)>, Requires<[In64BitMode]>;
 def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
           (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
           (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64_32m addr:$dst)>, Requires<[In64BitMode]>;
 def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
           (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
           (INC64r GR64:$src)>;
-def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC64m addr:$dst)>;
 def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
           (DEC64r GR64:$src)>;
-def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC64m addr:$dst)>;
 
 // Register-Register Logical Or with EFLAGS result
 def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
@@ -2362,20 +2311,6 @@ def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (OR64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Logical XOr with EFLAGS result
 def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2394,21 +2329,6 @@ def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (XOR64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 // Register-Register Logical And with EFLAGS result
 def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
                     (implicit EFLAGS)),
@@ -2427,21 +2347,6 @@ def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
                     (implicit EFLAGS)),
           (AND64rm GR64:$src1, addr:$src2)>;
 
-// Memory-Register Logical And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mr addr:$dst, GR64:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mi8 addr:$dst, i64immSExt8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), 
-                                        i64immSExt32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND64mi32 addr:$dst, i64immSExt32:$src2)>;
-
 //===----------------------------------------------------------------------===//
 // X86-64 SSE Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index b7309180bbbc..e6d1fee16f61 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -562,15 +562,13 @@ def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
 def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
                         []>;  // FPSW = cmp ST(0) with ST(i)
                         
+// CC = ST(0) cmp ST(i)
 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
-                  [(X86cmp RFP32:$lhs, RFP32:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
 def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
-                  [(X86cmp RFP64:$lhs, RFP64:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
-                  [(X86cmp RFP80:$lhs, RFP80:$rhs),
-                   (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
 }
 
 let Defs = [EFLAGS], Uses = [ST0] in {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 1225b683b745..c80a18dbf60b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -21,8 +21,7 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                    SDTCisInt<0>, SDTCisInt<3>]>;
 
-// FIXME: Should be modelled as returning i32
-def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
 
 def SDTX86Cmov    : SDTypeProfile<1, 4,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
@@ -120,12 +119,12 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
                         [SDNPHasChain, SDNPMayStore, 
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
-                        [SDNPHasChain, SDNPOptInFlag]>;
+                        [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
 
 def X86vastart_save_xmm_regs :
                  SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
                         SDT_X86VASTART_SAVE_XMM_REGS,
-                        [SDNPHasChain]>;
+                        [SDNPHasChain, SDNPVariadic]>;
 
 def X86callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
@@ -135,7 +134,8 @@ def X86callseq_end :
                         [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;       
 
 def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
-                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+                         SDNPVariadic]>;
 
 def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
@@ -158,7 +158,7 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
-                        [SDNPHasChain,  SDNPOptInFlag]>;
+                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
 
 def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
@@ -661,9 +661,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 
 // Loop instructions
 
-def LOOP   : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>;
-def LOOPE  : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>;
+def LOOP   : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -3200,17 +3200,16 @@ let Defs = [EFLAGS] in {
 let isCommutable = 1 in {   // TEST X, Y   --> TEST Y, X
 def TEST8rr  : I<0x84, MRMSrcReg, (outs),  (ins GR8:$src1, GR8:$src2),
                      "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR8:$src1, GR8:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
 def TEST16rr : I<0x85, MRMSrcReg, (outs),  (ins GR16:$src1, GR16:$src2),
                      "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR16:$src1, GR16:$src2), 0),
-                      (implicit EFLAGS)]>,
+                     [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
+                      0))]>,
                  OpSize;
 def TEST32rr : I<0x85, MRMSrcReg, (outs),  (ins GR32:$src1, GR32:$src2),
                      "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and_su GR32:$src1, GR32:$src2), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
+                      0))]>;
 }
 
 def TEST8i8  : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
@@ -3222,48 +3221,46 @@ def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
 
 def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
                      "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
+                       0))]>;
 def TEST16rm : I<0x85, MRMSrcMem, (outs),  (ins GR16:$src1, i16mem:$src2),
                      "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0),
-                      (implicit EFLAGS)]>, OpSize;
+                     [(set EFLAGS, (X86cmp (and GR16:$src1,
+                                         (loadi16 addr:$src2)), 0))]>, OpSize;
 def TEST32rm : I<0x85, MRMSrcMem, (outs),  (ins GR32:$src1, i32mem:$src2),
                      "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0),
-                      (implicit EFLAGS)]>;
+                     [(set EFLAGS, (X86cmp (and GR32:$src1,
+                                                (loadi32 addr:$src2)), 0))]>;
 
 def TEST8ri  : Ii8 <0xF6, MRM0r,                     // flags = GR8  & imm8
                     (outs),  (ins GR8:$src1, i8imm:$src2),
                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR8:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
 def TEST16ri : Ii16<0xF7, MRM0r,                     // flags = GR16 & imm16
                     (outs),  (ins GR16:$src1, i16imm:$src2),
                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR16:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>, OpSize;
+                    [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
+                    OpSize;
 def TEST32ri : Ii32<0xF7, MRM0r,                     // flags = GR32 & imm32
                     (outs),  (ins GR32:$src1, i32imm:$src2),
                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and_su GR32:$src1, imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
 
 def TEST8mi  : Ii8 <0xF6, MRM0m,                   // flags = [mem8]  & imm8
                     (outs), (ins i8mem:$src1, i8imm:$src2),
                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
+                     0))]>;
 def TEST16mi : Ii16<0xF7, MRM0m,                   // flags = [mem16] & imm16
                     (outs), (ins i16mem:$src1, i16imm:$src2),
                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>, OpSize;
+                    [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
+                     0))]>, OpSize;
 def TEST32mi : Ii32<0xF7, MRM0m,                   // flags = [mem32] & imm32
                     (outs), (ins i32mem:$src1, i32imm:$src2),
                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0),
-                     (implicit EFLAGS)]>;
+                    [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
+                     0))]>;
 } // Defs = [EFLAGS]
 
 
@@ -3477,45 +3474,41 @@ def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
 def CMP8rr  : I<0x38, MRMDestReg,
                 (outs), (ins GR8 :$src1, GR8 :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
 def CMP16rr : I<0x39, MRMDestReg,
                 (outs), (ins GR16:$src1, GR16:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
 def CMP32rr : I<0x39, MRMDestReg,
                 (outs), (ins GR32:$src1, GR32:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
 def CMP8mr  : I<0x38, MRMDestMem,
                 (outs), (ins i8mem :$src1, GR8 :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi8 addr:$src1), GR8:$src2),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
 def CMP16mr : I<0x39, MRMDestMem,
                 (outs), (ins i16mem:$src1, GR16:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi16 addr:$src1), GR16:$src2),
-                 (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
+                 OpSize;
 def CMP32mr : I<0x39, MRMDestMem,
                 (outs), (ins i32mem:$src1, GR32:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp (loadi32 addr:$src1), GR32:$src2),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
 def CMP8rm  : I<0x3A, MRMSrcMem,
                 (outs), (ins GR8 :$src1, i8mem :$src2),
                 "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR8:$src1, (loadi8 addr:$src2)),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
 def CMP16rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR16:$src1, i16mem:$src2),
                 "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR16:$src1, (loadi16 addr:$src2)),
-                 (implicit EFLAGS)]>, OpSize;
+                [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
+                 OpSize;
 def CMP32rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR32:$src1, i32mem:$src2),
                 "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86cmp GR32:$src1, (loadi32 addr:$src2)),
-                 (implicit EFLAGS)]>;
+                [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
 
 // These are alternate spellings for use by the disassembler, we mark them as
 // code gen only to ensure they aren't matched by the assembler.
@@ -3531,51 +3524,47 @@ let isCodeGenOnly = 1 in {
 def CMP8ri  : Ii8<0x80, MRM7r,
                   (outs), (ins GR8:$src1, i8imm:$src2),
                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                  [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>;
+                  [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
 def CMP16ri : Ii16<0x81, MRM7r,
                    (outs), (ins GR16:$src1, i16imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR16:$src1, imm:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
 def CMP32ri : Ii32<0x81, MRM7r,
                    (outs), (ins GR32:$src1, i32imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
 def CMP8mi  : Ii8 <0x80, MRM7m,
                    (outs), (ins i8mem :$src1, i8imm :$src2),
                    "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi8 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
 def CMP16mi : Ii16<0x81, MRM7m,
                    (outs), (ins i16mem:$src1, i16imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi16 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
+                   OpSize;
 def CMP32mi : Ii32<0x81, MRM7m,
                    (outs), (ins i32mem:$src1, i32imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi32 addr:$src1), imm:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
 def CMP16ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR16:$src1, i16i8imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR16:$src1, i16immSExt8:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
+                    OpSize;
 def CMP16mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i16mem:$src1, i16i8imm:$src2),
                    "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2),
-                    (implicit EFLAGS)]>, OpSize;
+                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
+                                         i16immSExt8:$src2))]>, OpSize;
 def CMP32mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i32mem:$src1, i32i8imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
+                                         i32immSExt8:$src2))]>;
 def CMP32ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR32:$src1, i32i8imm:$src2),
                    "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp GR32:$src1, i32immSExt8:$src2),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
 } // Defs = [EFLAGS]
 
 // Bit tests.
@@ -3583,12 +3572,10 @@ def CMP32ri8 : Ii8<0x83, MRM7r,
 let Defs = [EFLAGS] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR16:$src1, GR16:$src2),
-                (implicit EFLAGS)]>, OpSize, TB;
+               [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
 def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                "bt{l}\t{$src2, $src1|$src1, $src2}",
-               [(X86bt GR32:$src1, GR32:$src2),
-                (implicit EFLAGS)]>, TB;
+               [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -3610,23 +3597,22 @@ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
 
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR16:$src1, i16immSExt8:$src2),
-                 (implicit EFLAGS)]>, OpSize, TB;
+                [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+                OpSize, TB;
 def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt GR32:$src1, i32immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2),
-                 (implicit EFLAGS)]>, OpSize, TB;
+                [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+                 ]>, OpSize, TB;
 def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
-                [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2),
-                 (implicit EFLAGS)]>, TB;
+                [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+                 ]>, TB;
 
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
@@ -4401,11 +4387,11 @@ def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
 // Comparisons.
 
 // TEST R,R is smaller than CMP R,0
-def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR8:$src1, 0),
           (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR16:$src1, 0),
           (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
+def : Pat<(X86cmp GR32:$src1, 0),
           (TEST32rr GR32:$src1, GR32:$src1)>;
 
 // Conditional moves with folded loads with operands swapped and conditions
@@ -4799,42 +4785,6 @@ def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Addition with EFLAGS result
-def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register Subtraction with EFLAGS result
 def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -4874,43 +4824,6 @@ def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
-
-
 // Register-Register Signed Integer Multiply with EFLAGS result
 def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
                     (implicit EFLAGS)),
@@ -4969,36 +4882,18 @@ def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
 // INC and DEC with EFLAGS result. Note that these do not set CF.
 def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
           (INC8r GR8:$src)>;
-def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC8m addr:$dst)>;
 def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
           (DEC8r GR8:$src)>;
-def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC8m addr:$dst)>;
 
 def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
           (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC16m addr:$dst)>, Requires<[In32BitMode]>;
 def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
           (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC16m addr:$dst)>, Requires<[In32BitMode]>;
 
 def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
           (INC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (INC32m addr:$dst)>, Requires<[In32BitMode]>;
 def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
           (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
-                    (implicit EFLAGS)),
-          (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
 
 // Register-Register Or with EFLAGS result
 def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
@@ -5039,42 +4934,6 @@ def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer Or with EFLAGS result
-def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (OR32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register XOr with EFLAGS result
 def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -5114,42 +4973,6 @@ def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer XOr with EFLAGS result
-def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (XOR32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // Register-Register And with EFLAGS result
 def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
                     (implicit EFLAGS)),
@@ -5189,42 +5012,6 @@ def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
                     (implicit EFLAGS)),
           (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
 
-// Memory-Register And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND8mr addr:$dst, GR8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mr addr:$dst, GR16:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mr addr:$dst, GR32:$src2)>;
-
-// Memory-Integer And with EFLAGS result
-def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND8mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mi addr:$dst, imm:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND16mi8 addr:$dst, i16immSExt8:$src2)>;
-def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2),
-                           addr:$dst),
-                    (implicit EFLAGS)),
-          (AND32mi8 addr:$dst, i32immSExt8:$src2)>;
-
 // -disable-16bit support.
 def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst),
           (MOV16mi addr:$dst, imm:$src)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 18f9e52d470d..720b663a6aa7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -513,11 +513,10 @@ let mayLoad = 1 in
 let Defs = [EFLAGS] in {
 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
                    "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
                    "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
                     
 def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                   "comiss\t{$src2, $src1|$src1, $src2}", []>;
@@ -546,21 +545,21 @@ let Constraints = "$src1 = $dst" in {
 let Defs = [EFLAGS] in {
 def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                        "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
+                                               VR128:$src2))]>;
 def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
                        "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
+                                               (load addr:$src2)))]>;
 
 def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                       "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v4f32 VR128:$src1), VR128:$src2),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
+                                             VR128:$src2))]>;
 def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                       "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
+                                             (load addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases of packed SSE1 instructions for scalar use. These all have names
@@ -1298,11 +1297,10 @@ let mayLoad = 1 in
 let Defs = [EFLAGS] in {
 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
                    "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
                    "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
-                    (implicit EFLAGS)]>;
+                   [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases to match intrinsics which expect XMM operand(s).
@@ -1324,21 +1322,21 @@ let Constraints = "$src1 = $dst" in {
 let Defs = [EFLAGS] in {
 def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                        "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
+                                               VR128:$src2))]>;
 def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
                        "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
-                        (implicit EFLAGS)]>;
+                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
+                                               (load addr:$src2)))]>;
 
 def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                       "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
+                                             VR128:$src2))]>;
 def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                       "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
-                       (implicit EFLAGS)]>;
+                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
+                                             (load addr:$src2)))]>;
 } // Defs = [EFLAGS]
 
 // Aliases of packed SSE2 instructions for scalar use. These all have names
@@ -3825,54 +3823,65 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
 let Constraints = "$src1 = $dst" in {
   def CRC32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i8mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_8 GR32:$src1,
-                         (load addr:$src2)))]>, OpSize;
+                         (load addr:$src2)))]>;
   def CRC32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR8:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
-                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>,
-                         OpSize;
+                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
   def CRC32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i16mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_16 GR32:$src1,
                          (load addr:$src2)))]>,
                          OpSize;
   def CRC32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR16:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
                          OpSize;
   def CRC32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
                       (ins GR32:$src1, i32mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
                          (int_x86_sse42_crc32_32 GR32:$src1,
-                         (load addr:$src2)))]>, OpSize;
+                         (load addr:$src2)))]>;
   def CRC32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
                       (ins GR32:$src1, GR32:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
                        [(set GR32:$dst,
-                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>,
-                         OpSize;
-  def CRC64m64  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
-                      (ins GR64:$src1, i64mem:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
+  def CRC64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR64:$dst,
-                         (int_x86_sse42_crc32_64 GR64:$src1,
+                         (int_x86_sse42_crc64_8 GR64:$src1,
                          (load addr:$src2)))]>,
-                         OpSize, REX_W;
-  def CRC64r64  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
-                      (ins GR64:$src1, GR64:$src2),
-                      "crc32 \t{$src2, $src1|$src1, $src2}",
+                         REX_W;
+  def CRC64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
                        [(set GR64:$dst,
-                         (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
-                         OpSize, REX_W;
+                         (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
+                         REX_W;
+  def CRC64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i64mem:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_64 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR64:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
+                         REX_W;
 }
 
 // String/text processing instructions.
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 3f18696d8555..a9681e6670fc 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -38,14 +38,15 @@ public:
   ~X86MCCodeEmitter() {}
 
   unsigned getNumFixupKinds() const {
-    return 3;
+    return 4;
   }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
     const static MCFixupKindInfo Infos[] = {
-      { "reloc_pcrel_4byte", 0, 4 * 8 },
-      { "reloc_pcrel_1byte", 0, 1 * 8 },
-      { "reloc_riprel_4byte", 0, 4 * 8 }
+      { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
     };
     
     if (Kind < FirstTargetFixupKind)
@@ -165,7 +166,8 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
   // If the fixup is pc-relative, we need to bias the value to be relative to
   // the start of the field, not the end of the field.
   if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
-      FixupKind == MCFixupKind(X86::reloc_riprel_4byte))
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
     ImmOffset -= 4;
   if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
     ImmOffset -= 1;
@@ -197,6 +199,15 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
            "Invalid rip-relative address");
     EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
     
+    unsigned FixupKind = X86::reloc_riprel_4byte;
+    
+    // movq loads are handled with a special relocation form which allows the
+    // linker to eliminate some loads for GOT references which end up in the
+    // same linkage unit.
+    if (MI.getOpcode() == X86::MOV64rm ||
+        MI.getOpcode() == X86::MOV64rm_TC)
+      FixupKind = X86::reloc_riprel_4byte_movq_load;
+    
     // rip-relative addressing is actually relative to the *next* instruction.
     // Since an immediate can follow the mod/rm byte for an instruction, this
     // means that we need to bias the immediate field of the instruction with
@@ -204,7 +215,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // expression to emit.
     int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
     
-    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte),
+    EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
                   CurByte, OS, Fixups, -ImmSize);
     return;
   }
@@ -269,7 +280,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // Emit the normal disp32 encoding.
     EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
     ForceDisp32 = true;
-  } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) {
+  } else if (Disp.getImm() == 0 &&
+             // Base reg can't be anything that ends up with '5' as the base
+             // reg, it is the magic [*] nomenclature that indicates no base.
+             BaseRegNo != N86::EBP) {
     // Emit no displacement ModR/M byte
     EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
   } else if (isDisp8(Disp.getImm())) {
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index f907614c86af..cd568164b251 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -366,12 +366,3 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   if (StackAlignment)
     stackAlignment = StackAlignment;
 }
-
-bool X86Subtarget::enablePostRAScheduler(
-            CodeGenOpt::Level OptLevel,
-            TargetSubtarget::AntiDepBreakMode& Mode,
-            RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
-  CriticalPathRCs.clear();
-  return OptLevel >= CodeGenOpt::Aggressive;
-}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 50338d33acf9..56220db3b215 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -230,12 +230,6 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   unsigned getSpecialAddressLatency() const;
-
-  /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
-  /// at 'More' optimization level.
-  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
-                             RegClassVector& CriticalPathRCs) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 56ddaf858a6c..f13e6f352564 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 46805d59ae9d..2e9a1e5d6e8b 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -29,7 +29,8 @@ include "XCoreInstrFormats.td"
 // Call
 def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                             SDNPVariadic]>;
 
 def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTNone,
                          [SDNPHasChain, SDNPOptInFlag]>;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7b1e9c0efdd1..d8e97a26ada8 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -622,12 +622,12 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
         return false;  // Storing the value.
       }
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
-      if (CI->getOperand(0) != V) {
+      if (CI->getCalledValue() != V) {
         //cerr << "NONTRAPPING USE: " << **UI;
         return false;  // Not calling the ptr
       }
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
-      if (II->getOperand(0) != V) {
+      if (II->getCalledValue() != V) {
         //cerr << "NONTRAPPING USE: " << **UI;
         return false;  // Not calling the ptr
       }
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bdb46ebd1a64..65f2e15d2780 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -820,7 +820,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       
       // We cannot remove an invoke, because it would change the CFG, just
       // change the callee to a null pointer.
-      cast<InvokeInst>(OldCall)->setOperand(0,
+      cast<InvokeInst>(OldCall)->setCalledFunction(
                                     Constant::getNullValue(CalleeF->getType()));
       return 0;
     }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index de93e9f621fe..eb04d9401fbb 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -307,6 +307,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
       }
     }
   }
+
+  // The insertion point instruction may have been deleted; clear it out
+  // so that the rewriter doesn't trip over it later.
+  Rewriter.clearInsertPoint();
 }
 
 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 05027ae528e3..22f3628cf7c0 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1400,6 +1400,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
+        } else if (Name == "strchr" ||
+                   Name == "strrchr") {
+          if (FTy->getNumParams() != 2 ||
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isIntegerTy())
+            continue;
+          setOnlyReadsMemory(F);
+          setDoesNotThrow(F);
         } else if (Name == "strcpy" ||
                    Name == "stpcpy" ||
                    Name == "strcat" ||
@@ -1428,7 +1436,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         } else if (Name == "strcmp" ||
                    Name == "strspn" ||
                    Name == "strncmp" ||
-                   Name ==" strcspn" ||
+                   Name == "strcspn" ||
                    Name == "strcoll" ||
                    Name == "strcasecmp" ||
                    Name == "strncasecmp") {
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index fd742413abf1..0eb9f020dc9a 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1027,6 +1027,15 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
     return;
   }
 
+  if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
+    Out << "{ ";
+    TypePrinter.print(CU->getOperand(0)->getType(), Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine);
+    Out << " }";
+    return;
+  }
+  
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 721e96a0f552..f1413825697a 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1623,10 +1623,6 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     MDNode *MD = cast<MDNode>(CI.getOperand(1));
     Assert1(MD->getNumOperands() == 1,
                 "invalid llvm.dbg.declare intrinsic call 2", &CI);
-    if (MD->getOperand(0))
-      if (Constant *C = dyn_cast<Constant>(MD->getOperand(0)))
-        Assert1(C && !isa<ConstantPointerNull>(C),
-                "invalid llvm.dbg.declare intrinsic call 3", &CI);
   } break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index a4e76859d16d..f17d059ed083 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -4,8 +4,8 @@
 
 define arm_aapcscc void @g() {
 entry:
-;CHECK: [sp, #+8]
-;CHECK: [sp, #+12]
+;CHECK: [sp, #8]
+;CHECK: [sp, #12]
 ;CHECK: [sp]
         tail call arm_aapcscc  void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
         ret void
diff --git a/test/CodeGen/ARM/2009-10-30.ll b/test/CodeGen/ARM/2009-10-30.ll
index 90a5bd2a7579..87d1a8b9e9a2 100644
--- a/test/CodeGen/ARM/2009-10-30.ll
+++ b/test/CodeGen/ARM/2009-10-30.ll
@@ -6,7 +6,7 @@ define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
 entry:
 ;CHECK: sub	sp, sp, #4
 ;CHECK: add	r{{[0-9]+}}, sp, #8
-;CHECK: str	r{{[0-9]+}}, [sp], #+4
+;CHECK: str	r{{[0-9]+}}, [sp], #4
 ;CHECK: bx	lr
 	%ap = alloca i8*, align 4
 	%ap1 = bitcast i8** %ap to i8*
diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
new file mode 100644
index 000000000000..31525eff4461
--- /dev/null
+++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
+; CHECK: ldmia	sp!, {r11, pc}
+  ret i32 %1
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 52ab8717c15f..fb0f8ff87906 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -5,7 +5,7 @@
 
 define void @test(i32* %P, i32 %A, i32 %i) nounwind {
 entry:
-; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
+; CHECK: str r1, [{{r.*}}, {{r.*}}, lsl #2]
         icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
         br i1 %0, label %return, label %bb
 
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 886c0d55cfa8..adb449739743 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -41,7 +41,7 @@ define i32 @test1() {
 ; DarwinPIC: _test1:
 ; DarwinPIC: 	ldr r0, LCPI1_0
 ; DarwinPIC: LPC1_0:
-; DarwinPIC:    ldr r0, [pc, +r0]
+; DarwinPIC:    ldr r0, [pc, r0]
 ; DarwinPIC:    ldr r0, [r0]
 ; DarwinPIC:    bx lr
 
@@ -63,7 +63,7 @@ define i32 @test1() {
 	
 ; LinuxPIC: .LPC1_0:
 ; LinuxPIC: 	add r0, pc, r0
-; LinuxPIC: 	ldr r0, [r1, +r0]
+; LinuxPIC: 	ldr r0, [r1, r0]
 ; LinuxPIC: 	ldr r0, [r0]
 ; LinuxPIC: 	bx lr
 
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 623f2cb1dfab..8677ce535971 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define void @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmfdlt sp!, {r7, pc}
+; CHECK: ldmialt sp!, {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index d7fcf7d64408..342208b6b5e2 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdhi | count 1
+; RUN:   grep ldmiahi | count 1
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index c60ad93699f1..eb97085ac004 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
 ; RUN:   grep moveq | count 1
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdeq | count 1
+; RUN:   grep ldmiaeq | count 1
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index a7da834f7810..1e39060e69f2 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmfdne | count 1
+; RUN:   grep ldmiane | count 1
 
 	%struct.SString = type { i8*, i32, i32 }
 
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 1a016a0942d2..9a2dc82d1894 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -24,7 +24,7 @@ define i32 @t2() {
 define i32 @t3() {
 ; CHECK: t3:
 ; CHECK: ldmib
-; CHECK: ldmfd sp!
+; CHECK: ldmia sp!
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index c366e2dca5fb..895562a1d31e 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -10,10 +10,10 @@ entry:
 ;V6:   ldrd r2, [r2]
 
 ;V5:   ldr r3, [r2]
-;V5:   ldr r2, [r2, #+4]
+;V5:   ldr r2, [r2, #4]
 
 ;EABI: ldr r3, [r2]
-;EABI: ldr r2, [r2, #+4]
+;EABI: ldr r2, [r2, #4]
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f8d3df29c408..553cd64fce94 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4}
+; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #4}
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index d932f90e4c10..57370c4de1c2 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -7,7 +7,7 @@
 
 define i32 @f() {
 ; CHECK-NONPIC: f:
-; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
 ; CHECK-PIC: f:
 ; CHECK-PIC: __tls_get_addr
@@ -18,7 +18,7 @@ entry:
 
 define i32* @g() {
 ; CHECK-NONPIC: g:
-; CHECK-NONPIC: ldr {{r.}}, [pc, +{{r.}}]
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
 ; CHECK-PIC: g:
 ; CHECK-PIC: __tls_get_addr
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
new file mode 100644
index 000000000000..ce3364d45ed8
--- /dev/null
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; rdar://7759395
+
+%0 = type { i32, i32 }
+
+define void @t(%0*, i32, i32, i32, i32) nounwind {
+  tail call void @llvm.dbg.value(metadata !{%0* %0}, i64 0, metadata !0)
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 0} ;
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index b4b6ed9ff395..bfb7f6eabc7f 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -12,10 +12,10 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:	it ne
-; CHECK-NEXT: ldmfdne.w
+; CHECK-NEXT: ldmiane.w
 ; CHECK-NEXT: itt eq
 ; CHECK-NEXT: subeq.w
-; CHECK-NEXT: ldmfdeq.w
+; CHECK-NEXT: ldmiaeq.w
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index fe0e506c6c62..97295341858c 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -27,7 +27,7 @@ define i32 @test3() {
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
 ; LINUX: test3:
-; LINUX: stmfd   sp!, {r4, r7, r11, lr}
+; LINUX: stmdb   sp!, {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index f007b5c69720..55cdac983b3d 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define arm_apcscc %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	r9, [r7, #+28]
+; CHECK:       ldr.w	r9, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br i1 false, label %bb, label %bb20
@@ -50,9 +50,9 @@ bb119:                                            ; preds = %bb20, %bb20
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
 ; CHECK: str r{{[0-7]}}, [sp]
-; CHECK: str r{{[0-7]}}, [sp, #+4]
-; CHECK: str r{{[0-7]}}, [sp, #+8]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #+24]
+; CHECK: str r{{[0-7]}}, [sp, #4]
+; CHECK: str r{{[0-7]}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 94888fd94050..88434f1c7d80 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -11,7 +11,7 @@ entry:
 define i32 @f2(i32* %v) {
 entry:
 ; CHECK: f2:
-; CHECK: ldr.w r0, [r0, #+4092]
+; CHECK: ldr.w r0, [r0, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
         ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index f1fb79c35ed0..fee97bf68913 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -11,7 +11,7 @@ entry:
 define i16 @f2(i16* %v) {
 entry:
 ; CHECK: f2:
-; CHECK: ldrh.w r0, [r0, #+2046]
+; CHECK: ldrh.w r0, [r0, #2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
         ret i16 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 3eeec8c3850f..11bb936d1e64 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -9,7 +9,7 @@ define i32 @f1(i32 %a, i32* %v) {
 
 define i32 @f2(i32 %a, i32* %v) {
 ; CHECK: f2:
-; CHECK: str.w r0, [r1, #+4092]
+; CHECK: str.w r0, [r1, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
         ret i32 %a
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 9af960bbebde..1e6616a91cc5 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -2,7 +2,7 @@
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
 ; CHECK: test1
-; CHECK: str  r1, [r0, #+16]!
+; CHECK: str  r1, [r0, #16]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
@@ -12,7 +12,7 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
 
 define i16* @test2(i16* %X, i32* %A) {
 ; CHECK: test2
-; CHECK: strh r1, [r0, #+8]!
+; CHECK: strh r1, [r0, #8]!
         %B = load i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index 1ebb938b1a88..7978e7fa918e 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -9,7 +9,7 @@ define i8 @f1(i8 %a, i8* %v) {
 
 define i8 @f2(i8 %a, i8* %v) {
 ; CHECK: f2:
-; CHECK: strb.w r0, [r1, #+4092]
+; CHECK: strb.w r0, [r1, #4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
         ret i8 %a
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index b0eb8c12f594..97110a726f47 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -9,7 +9,7 @@ define i16 @f1(i16 %a, i16* %v) {
 
 define i16 @f2(i16 %a, i16* %v) {
 ; CHECK: f2:
-; CHECK: strh.w r0, [r1, #+4092]
+; CHECK: strh.w r0, [r1, #4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
         ret i16 %a
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 317ed0a4f7d3..58e186b4c4f8 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK:      mulss	LCPI1_3(%rip)
-; CHECK-NEXT: mulss	LCPI1_0(%rip)
-; CHECK-NEXT: mulss	LCPI1_1(%rip)
-; CHECK-NEXT: mulss	LCPI1_2(%rip)
-; CHECK-NEXT: addss
-; CHECK-NEXT: addss
-; CHECK-NEXT: addss
-; CHECK-NEXT: ret
+; CHECK: mulss	LCPI1_0(%rip)
+; CHECK: mulss	LCPI1_1(%rip)
+; CHECK: addss
+; CHECK: mulss	LCPI1_2(%rip)
+; CHECK: addss
+; CHECK: mulss	LCPI1_3(%rip)
+; CHECK: addss
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2010-03-17-ISelBug.ll b/test/CodeGen/X86/2010-03-17-ISelBug.ll
new file mode 100644
index 000000000000..609b4e24900b
--- /dev/null
+++ b/test/CodeGen/X86/2010-03-17-ISelBug.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin5
+; rdar://7761790
+
+%"struct..0$_485" = type { i16, i16, i32 }
+%union.PPToken = type { %"struct..0$_485" }
+%struct.PPOperation = type { %union.PPToken, %union.PPToken, [6 x %union.PPToken], i32, i32, i32, [1 x i32], [0 x i8] }
+
+define i32* @t() align 2 nounwind {
+entry:
+  %operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
+  %0 = load i32*** null, align 4  ; [#uses=1]
+  %1 = ptrtoint i32** %0 to i32   ; <i32> [#uses=1]
+  %2 = sub nsw i32 %1, undef                      ; <i32> [#uses=2]
+  br i1 false, label %bb20, label %bb.nph380
+
+bb20:                                             ; preds = %entry
+  ret i32* null
+
+bb.nph380:                                        ; preds = %entry
+  %scevgep403 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
+  %3 = ashr i32 %2, 1                             ; <i32> [#uses=1]
+  %tmp405 = and i32 %3, -2                        ; <i32> [#uses=1]
+  %scevgep408 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
+  %tmp410 = and i32 %2, -4                        ; <i32> [#uses=1]
+  br label %bb169
+
+bb169:                                            ; preds = %bb169, %bb.nph380
+  %index.6379 = phi i32 [ 0, %bb.nph380 ], [ %4, %bb169 ] ; <i32> [#uses=3]
+  %tmp404 = mul i32 %index.6379, -2               ; <i32> [#uses=1]
+  %tmp406 = add i32 %tmp405, %tmp404              ; <i32> [#uses=1]
+  %scevgep407 = getelementptr i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
+  %tmp409 = mul i32 %index.6379, -4               ; <i32> [#uses=1]
+  %tmp411 = add i32 %tmp410, %tmp409              ; <i32> [#uses=1]
+  %scevgep412 = getelementptr i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
+  store i16 undef, i16* %scevgep412, align 2
+  store i32 undef, i32* %scevgep407, align 4
+  %4 = add nsw i32 %index.6379, 1                 ; <i32> [#uses=1]
+  br label %bb169
+}
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 07198386b8ea..3e730de0a8e5 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
+; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 2f6fb3fa8be1..ab71555950b2 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -8,10 +8,10 @@ target triple = "x86_64-unknown-unknown"
 
 ; CHECK: full_me_0:
 ; CHECK: movsd   (%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: mulsd   (%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -53,10 +53,10 @@ return:
 ; CHECK: mulsd   -2048(%rdx), %xmm0
 ; CHECK: movsd   %xmm0, -2048(%rdi)
 ; CHECK: movsd   (%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   (%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -99,10 +99,10 @@ return:
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
 ; CHECK: movsd   -2048(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   -2048(%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -144,10 +144,10 @@ return:
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
 ; CHECK: movsd   -4096(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: divsd   -4096(%rdx), %xmm0
-; CHECK: addq    $8, %rdx
 ; CHECK: movsd   %xmm0, -4096(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
 ; CHECK: addq    $8, %rdi
 ; CHECK: decq    %rcx
 ; CHECK: jne
@@ -310,10 +310,10 @@ return:
 ; CHECK: addsd   (%rsi), %xmm0
 ; CHECK: movsd   %xmm0, (%rdx)
 ; CHECK: movsd   40(%rdi), %xmm0
-; CHECK: addq    $8, %rdi
 ; CHECK: subsd   40(%rsi), %xmm0
-; CHECK: addq    $8, %rsi
 ; CHECK: movsd   %xmm0, 40(%rdx)
+; CHECK: addq    $8, %rdi
+; CHECK: addq    $8, %rsi
 ; CHECK: addq    $8, %rdx
 ; CHECK: decq    %rcx
 ; CHECK: jne
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 90315fd2f267..ce35b4545184 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -57,11 +57,11 @@ define void @t5(void ()* nocapture %x) nounwind ssp {
 entry:
 ; 32: t5:
 ; 32-NOT: call
-; 32: jmpl *
+; 32: jmpl *4(%esp)
 
 ; 64: t5:
 ; 64-NOT: call
-; 64: jmpq *
+; 64: jmpq *%rdi
   tail call void %x() nounwind
   ret void
 }
@@ -215,4 +215,59 @@ entry:
   ret %struct.ns* %0
 }
 
+; rdar://6195379
+; llvm can't do sibcall for this in 32-bit mode (yet).
 declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_descriptor_withcopydispose = type { i64, i64, i8*, i8* }
+%struct.__block_literal_1 = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+%struct.__block_literal_2 = type { i8*, i32, i32, i8*, %struct.__block_descriptor_withcopydispose*, void ()* }
+
+define void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp {
+entry:
+; 64: t14:
+; 64: movq 32(%rdi)
+; 64-NOT: movq 16(%rdi)
+; 64: jmpq *16(%rdi)
+  %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
+  %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
+  %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
+  %3 = getelementptr inbounds %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8                      ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to void (i8*)*              ; <void (i8*)*> [#uses=1]
+  %6 = bitcast void ()* %1 to i8*                 ; <i8*> [#uses=1]
+  tail call void %5(i8* %6) nounwind
+  ret void
+}
+
+; rdar://7726868
+%struct.foo = type { [4 x i32] }
+
+define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32: t15:
+; 32: call {{_?}}f
+; 32: ret $4
+
+; 64: t15:
+; 64: callq {{_?}}f
+; 64: ret
+  tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+declare void @f(%struct.foo* noalias sret) nounwind
+
+define void @t16() nounwind ssp {
+entry:
+; 32: t16:
+; 32: call {{_?}}bar4
+; 32: fstp
+
+; 64: t16:
+; 64: jmp {{_?}}bar4
+  %0 = tail call double @bar4() nounwind
+  ret void
+}
+
+declare double @bar4()
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index f2b8010d4138..20b8eac9c8d8 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -10,10 +10,10 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
         
 ; CHECK: t1:
 ; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movl	4(%esp), %ecx
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
 
@@ -26,9 +26,9 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
         
 ; CHECK: t2:
 ; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movl	4(%esp), %ecx
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 921161e4a122..e9c2c01a9e44 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -17,8 +17,8 @@ entry:
         
 ; X64: t0:
 ; X64: 	movddup	(%rsi), %xmm0
-; X64:	xorl	%eax, %eax
 ; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
 ; X64:	pinsrw	$0, %eax, %xmm0
 ; X64:	movaps	%xmm0, (%rdi)
 ; X64:	ret
@@ -169,11 +169,11 @@ define internal void @t10() nounwind {
         ret void
 ; X64: 	t10:
 ; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		pextrw	$6, %xmm0, %edx
 ; X64: 		movlhps	%xmm1, %xmm1
 ; X64: 		pshuflw	$8, %xmm1, %xmm1
 ; X64: 		pinsrw	$2, %eax, %xmm1
-; X64: 		pinsrw	$3, %edx, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
 }
 
 
@@ -184,8 +184,8 @@ entry:
 	ret <8 x i16> %tmp7
 
 ; X64: t11:
-; X64:	movlhps	%xmm0, %xmm0
 ; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
 ; X64:	pshuflw	$1, %xmm0, %xmm0
 ; X64:	pinsrw	$1, %eax, %xmm0
 ; X64:	ret
@@ -198,8 +198,8 @@ entry:
 	ret <8 x i16> %tmp9
 
 ; X64: t12:
-; X64: 	movlhps	%xmm0, %xmm0
 ; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
 ; X64: 	pshufhw	$3, %xmm0, %xmm0
 ; X64: 	pinsrw	$5, %eax, %xmm0
 ; X64: 	ret
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index c9c4d012102a..1723909a4b49 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -9,10 +9,10 @@ define i32 @crc32_8(i32 %a, i8 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
   ret i32 %tmp
 ; X32: _crc32_8:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32b   8(%esp), %eax
 
 ; X64: _crc32_8:
-; X64:     crc32   %sil, %eax
+; X64:     crc32b   %sil, %eax
 }
 
 
@@ -20,10 +20,10 @@ define i32 @crc32_16(i32 %a, i16 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
   ret i32 %tmp
 ; X32: _crc32_16:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32w   8(%esp), %eax
 
 ; X64: _crc32_16:
-; X64:     crc32   %si, %eax
+; X64:     crc32w   %si, %eax
 }
 
 
@@ -31,8 +31,8 @@ define i32 @crc32_32(i32 %a, i32 %b) nounwind {
   %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
   ret i32 %tmp
 ; X32: _crc32_32:
-; X32:     crc32   8(%esp), %eax
+; X32:     crc32l   8(%esp), %eax
 
 ; X64: _crc32_32:
-; X64:     crc32   %esi, %eax
+; X64:     crc32l   %esi, %eax
 }
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 11383fa30829..58b557a7915c 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -152,4 +152,28 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 	%x = add %i8vec31 %a, %b
 	store %i8vec31 %x, %i8vec31* %ret, align 16
 	ret void
-}
\ No newline at end of file
+}
+
+
+%i8vec3pack = type { <3 x i8>, i8 }
+define %i8vec3pack  @rot() nounwind {
+; CHECK: shrb
+entry:
+  %X = alloca %i8vec3pack, align 4
+  %rot = alloca %i8vec3pack, align 4
+  %result = alloca %i8vec3pack, align 4
+  %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
+  store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
+  %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
+  store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
+  %tmp = load %i8vec3pack* %X
+  %extractVec = extractvalue %i8vec3pack %tmp, 0
+  %tmp2 = load %i8vec3pack* %rot
+  %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
+  %shr = lshr <3 x i8> %extractVec, %extractVec3
+  %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
+  store <3 x i8> %shr, <3 x i8>* %storetmp4
+  %tmp5 = load %i8vec3pack* %result
+  ret %i8vec3pack %tmp5
+}
+
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
new file mode 100644
index 000000000000..1f7a889c7d70
--- /dev/null
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | opt -verify -disable-output
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
+  ret void
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
diff --git a/test/FrontendObjC/2010-03-17-StructRef.m b/test/FrontendObjC/2010-03-17-StructRef.m
new file mode 100644
index 000000000000..a8a509c2e98a
--- /dev/null
+++ b/test/FrontendObjC/2010-03-17-StructRef.m
@@ -0,0 +1,43 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// Bitfield references must not touch memory outside of the enclosing
+// struct.   Radar 7639995
+typedef signed char BOOL;
+@protocol NSObject
+- (id)init;
+@end
+@interface NSObject <NSObject> {}
+@end
+@interface IMAVChatParticipant : NSObject {
+  int _ardRole;
+  int _state;
+  int _avRelayStatus;
+  int _chatEndedReason;
+  int _chatError;
+  unsigned _sendingAudio:1;
+  unsigned _sendingVideo:1;
+  unsigned _sendingAuxVideo:1;
+  unsigned _audioMuted:1;
+  unsigned _videoPaused:1;
+  unsigned _networkStalled:1;
+  unsigned _isInitiator:1;
+  unsigned _isAOLInterop:1;
+  unsigned _isRecording:1;
+  unsigned _isUsingICE:1;
+}
+@end
+@implementation IMAVChatParticipant
+- (id) init {
+  self = [super init];
+  if ( self ) {
+    BOOL blah = (BOOL)1;
+    // We're expecting these three bitfield assignments will generate i8 stores.
+    _sendingAudio = (BOOL)1;
+    _isUsingICE = (BOOL)1;
+    _isUsingICE = blah;
+    // CHECK: store i8
+    // CHECK: store i8
+    // CHECK: store i8
+  }
+  return self;
+}
+@end
diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td
index 539a93f8349b..254d5eaf37af 100644
--- a/test/LLVMC/AppendCmdHook.td
+++ b/test/LLVMC/AppendCmdHook.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/C++/dash-x.cpp b/test/LLVMC/C++/dash-x.cpp
index b32400e0b039..7d4cf19fa41d 100644
--- a/test/LLVMC/C++/dash-x.cpp
+++ b/test/LLVMC/C++/dash-x.cpp
@@ -1,6 +1,7 @@
 // Test that we can compile .c files as C++ and vice versa
 // RUN: llvmc %s -x c++ %p/../test_data/false.c -x c %p/../test_data/false.cpp -x lisp -x whatnot -x none %p/../test_data/false2.cpp -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 
 extern int test_main();
 
diff --git a/test/LLVMC/C++/hello.cpp b/test/LLVMC/C++/hello.cpp
index b9c6399ebfc1..8f38306e9e96 100644
--- a/test/LLVMC/C++/hello.cpp
+++ b/test/LLVMC/C++/hello.cpp
@@ -1,6 +1,7 @@
 // Test that we can compile C++ code.
 // RUN: llvmc %s -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 #include <iostream>
 
 int main() {
diff --git a/test/LLVMC/C++/together.cpp b/test/LLVMC/C++/together.cpp
index e02f69aec8d7..925215a4db51 100644
--- a/test/LLVMC/C++/together.cpp
+++ b/test/LLVMC/C++/together.cpp
@@ -1,6 +1,7 @@
 // Check that we can compile files of different types together.
 // RUN: llvmc %s %p/../test_data/together.c -o %t
 // RUN: %abs_tmp | grep hello
+// XFAIL: vg
 
 extern "C" void test();
 
diff --git a/test/LLVMC/C/emit-llvm.c b/test/LLVMC/C/emit-llvm.c
index 38bbba6f0afc..9844bc757cc6 100644
--- a/test/LLVMC/C/emit-llvm.c
+++ b/test/LLVMC/C/emit-llvm.c
@@ -1,4 +1,5 @@
 // RUN: llvmc -c -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// XFAIL: vg_leak
 
 int f0(void) {
 }
diff --git a/test/LLVMC/C/hello.c b/test/LLVMC/C/hello.c
index b2d903f8d53f..29ad39fd2cb6 100644
--- a/test/LLVMC/C/hello.c
+++ b/test/LLVMC/C/hello.c
@@ -2,6 +2,7 @@
  * Check that we can compile helloworld
  * RUN: llvmc %s -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/include.c b/test/LLVMC/C/include.c
index 07ae761d2c88..9c9530bfb49f 100644
--- a/test/LLVMC/C/include.c
+++ b/test/LLVMC/C/include.c
@@ -2,6 +2,7 @@
  * Check that the 'include' options work.
  * RUN: echo "int x;\n" > %t1.inc
  * RUN: llvmc -include %t1.inc -fsyntax-only %s
+ * XFAIL: vg_leak
  */
 
 int f0(void) {
diff --git a/test/LLVMC/C/opt-test.c b/test/LLVMC/C/opt-test.c
index d69dc9b479f8..7924def203ab 100644
--- a/test/LLVMC/C/opt-test.c
+++ b/test/LLVMC/C/opt-test.c
@@ -2,6 +2,7 @@
  * Check that the -opt switch works.
  * RUN: llvmc %s -opt -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/sink.c b/test/LLVMC/C/sink.c
index bdff340da903..c4f9beba8c38 100644
--- a/test/LLVMC/C/sink.c
+++ b/test/LLVMC/C/sink.c
@@ -2,6 +2,7 @@
  * Check that the 'sink' options work.
  * RUN: llvmc -v -Wall %s -o %t |& grep "Wall"
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/C/wall.c b/test/LLVMC/C/wall.c
index f6760990b881..36813ba0f833 100644
--- a/test/LLVMC/C/wall.c
+++ b/test/LLVMC/C/wall.c
@@ -2,6 +2,7 @@
  * Check that -Wall works as intended
  * RUN: llvmc -Wall %s -o %t
  * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
  */
 
 #include <stdio.h>
diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td
index 934905b15e99..e5d5e9a64cdb 100644
--- a/test/LLVMC/EmptyCompilationGraph.td
+++ b/test/LLVMC/EmptyCompilationGraph.td
@@ -1,6 +1,7 @@
 // Check that the compilation graph can be empty.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td
index c563171335d6..86091db9bdfb 100644
--- a/test/LLVMC/EnvParentheses.td
+++ b/test/LLVMC/EnvParentheses.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: not grep {FOO")));} %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td
index 77cb4bf65269..d84ea847bf12 100644
--- a/test/LLVMC/ExternOptions.td
+++ b/test/LLVMC/ExternOptions.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td
index 7c3bd1798a2b..536b96a9758f 100644
--- a/test/LLVMC/ForwardAs.td
+++ b/test/LLVMC/ForwardAs.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td
index 2caef6cd5393..5e0bf290d1fd 100644
--- a/test/LLVMC/ForwardTransformedValue.td
+++ b/test/LLVMC/ForwardTransformedValue.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td
index 463235c61117..4c7a0ee0ec5e 100644
--- a/test/LLVMC/ForwardValue.td
+++ b/test/LLVMC/ForwardValue.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td
index 312fa9ccebbe..5ff96cd6a88d 100644
--- a/test/LLVMC/HookWithArguments.td
+++ b/test/LLVMC/HookWithArguments.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td
index f58e3f4288fb..9855dbc5bd9a 100644
--- a/test/LLVMC/HookWithInFile.td
+++ b/test/LLVMC/HookWithInFile.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td
index ff9a0d8d90f8..05209bf61aca 100644
--- a/test/LLVMC/Init.td
+++ b/test/LLVMC/Init.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td
index b52af57ade6a..73ccb6311f3c 100644
--- a/test/LLVMC/MultiValuedOption.td
+++ b/test/LLVMC/MultiValuedOption.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td
index 9702248b5729..86cd6131243a 100644
--- a/test/LLVMC/MultipleCompilationGraphs.td
+++ b/test/LLVMC/MultipleCompilationGraphs.td
@@ -1,6 +1,7 @@
 // Check that multiple compilation graphs are allowed.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td
index 015bfdd09739..a80bcfe6ce1c 100644
--- a/test/LLVMC/NoActions.td
+++ b/test/LLVMC/NoActions.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td
index 96c1f17e18e0..69df70133307 100644
--- a/test/LLVMC/NoCompilationGraph.td
+++ b/test/LLVMC/NoCompilationGraph.td
@@ -1,5 +1,6 @@
 // Check that the compilation graph is not required.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td
index 42ec693fbb7a..37fbc87fdfab 100644
--- a/test/LLVMC/OneOrMore.td
+++ b/test/LLVMC/OneOrMore.td
@@ -3,6 +3,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
index 8a314818a0a9..c2641be7e645 100644
--- a/test/LLVMC/OptionPreprocessor.td
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -2,6 +2,7 @@
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: FileCheck -input-file %t %s
 // RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td
index 9523e24702ea..0388cb0b0f1e 100644
--- a/test/LLVMC/TestWarnings.td
+++ b/test/LLVMC/TestWarnings.td
@@ -1,6 +1,7 @@
 // Check that warnings about unused options are really emitted.
 // This should fail because the output is printed on stderr.
-// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// RUN: tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// XFAIL: vg_leak
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/MC/AsmParser/X86/x86_32-bit_cat.s b/test/MC/AsmParser/X86/x86_32-bit_cat.s
index 5429e8e23c64..e910c653e96c 100644
--- a/test/MC/AsmParser/X86/x86_32-bit_cat.s
+++ b/test/MC/AsmParser/X86/x86_32-bit_cat.s
@@ -7756,41 +7756,38 @@
 // CHECK: 	ptest 	%xmm5, %xmm5
         	ptest	%xmm5,%xmm5
 
-// CHECK: 	crc32 	3735928559(%ebx,%ecx,8), %ecx
-        	crc32	0xdeadbeef(%ebx,%ecx,8),%ecx
+// CHECK: 	crc32b 	%bl, %eax
+                crc32b %bl, %eax
 
-// CHECK: 	crc32 	69, %ecx
-        	crc32	0x45,%ecx
+// CHECK: 	crc32b 	4(%ebx), %eax
+                crc32b 4(%ebx), %eax
 
-// CHECK: 	crc32 	32493, %ecx
-        	crc32	0x7eed,%ecx
+// CHECK: 	crc32w 	%bx, %eax
+                crc32w %bx, %eax
 
-// CHECK: 	crc32 	3133065982, %ecx
-        	crc32	0xbabecafe,%ecx
+// CHECK: 	crc32w 	4(%ebx), %eax
+                crc32w 4(%ebx), %eax
 
-// CHECK: 	crc32 	305419896, %ecx
-        	crc32	0x12345678,%ecx
+// CHECK: 	crc32l 	%ebx, %eax
+                crc32l %ebx, %eax
 
-// CHECK: 	crc32 	%ecx, %ecx
-        	crc32	%ecx,%ecx
+// CHECK: 	crc32l 	4(%ebx), %eax
+                crc32l 4(%ebx), %eax
 
-// CHECK: 	crc32 	%ecx, %ecx
-        	crc32	%ecx,%ecx
+// CHECK: 	crc32l 	3735928559(%ebx,%ecx,8), %ecx
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
 
-// CHECK: 	crc32 	3735928559(%ebx,%ecx,8), %ecx
-        	crc32	0xdeadbeef(%ebx,%ecx,8),%ecx
+// CHECK: 	crc32l 	69, %ecx
+                crc32l 0x45,%ecx
 
-// CHECK: 	crc32 	69, %ecx
-        	crc32	0x45,%ecx
+// CHECK: 	crc32l 	32493, %ecx
+                crc32l 0x7eed,%ecx
 
-// CHECK: 	crc32 	32493, %ecx
-        	crc32	0x7eed,%ecx
+// CHECK: 	crc32l 	3133065982, %ecx
+                crc32l 0xbabecafe,%ecx
 
-// CHECK: 	crc32 	3133065982, %ecx
-        	crc32	0xbabecafe,%ecx
-
-// CHECK: 	crc32 	305419896, %ecx
-        	crc32	0x12345678,%ecx
+// CHECK: 	crc32l 	%ecx, %ecx
+                crc32l %ecx,%ecx
 
 // CHECK: 	pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
         	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index 7dacc7508636..2088aa7bd551 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -9861,3 +9861,47 @@
 // CHECK: pcmpgtq	%xmm5, %xmm5
 // CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xed]
         	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+                crc32b %bl, %eax
+
+// CHECK: crc32b 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+                crc32b 4(%ebx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32w %bx, %eax
+
+// CHECK: crc32w 	4(%ebx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32w 4(%ebx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32l %ebx, %eax
+
+// CHECK: crc32l 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32l 4(%ebx), %eax
+
+// CHECK: crc32l 	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0x45,0x00,0x00,0x00]
+                crc32l 0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xed,0x7e,0x00,0x00]
+                crc32l 0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xfe,0xca,0xbe,0xba]
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+                crc32l %ecx,%ecx
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
new file mode 100644
index 000000000000..3920c5b5f224
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+        crc32b	%bl, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32w	%bx, %eax
+
+// CHECK: crc32w 	4(%rbx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32w	4(%rbx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32l	%ebx, %eax
+
+// CHECK: crc32l 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32l	4(%rbx), %eax
+
+// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]
+        	crc32l   0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xed,0x7e,0x00,0x00]
+        	crc32l   0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xfe,0xca,0xbe,0xba]
+        	crc32l   0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+        	crc32l   %ecx,%ecx
+
+// CHECK: crc32b 	%r11b, %eax
+// CHECK:  encoding: [0xf2,0x41,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32b 	%dil, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0xc7]
+        crc32b	%dil,%rax
+
+// CHECK: crc32b 	%r11b, %rax
+// CHECK:  encoding: [0xf2,0x49,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b,%rax
+
+// CHECK: crc32b 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %rax
+
+// CHECK: crc32q 	%rbx, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc3]
+        crc32q	%rbx, %rax
+
+// CHECK: crc32q 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
+        crc32q	4(%rbx), %rax
diff --git a/test/MC/AsmParser/X86/x86_64-incl_decl.s b/test/MC/AsmParser/X86/x86_64-incl_decl.s
new file mode 100644
index 000000000000..51315f83f2cc
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-incl_decl.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_32 %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_64 %s
+
+# CHECK-X86_32:	incb	%al # encoding: [0xfe,0xc0]
+# CHECK-X86_64:	incb	%al # encoding: [0xfe,0xc0]
+	incb %al
+
+# CHECK-X86_32:	incw	%ax # encoding: [0x66,0x40]
+# CHECK-X86_64:	incw	%ax # encoding: [0x66,0xff,0xc0]
+	incw %ax
+
+# CHECK-X86_32:	incl	%eax # encoding: [0x40]
+# CHECK-X86_64:	incl	%eax # encoding: [0xff,0xc0]
+	incl %eax
+
+# CHECK-X86_32:	decb	%al # encoding: [0xfe,0xc8]
+# CHECK-X86_64:	decb	%al # encoding: [0xfe,0xc8]
+	decb %al
+
+# CHECK-X86_32:	decw	%ax # encoding: [0x66,0x48]
+# CHECK-X86_64:	decw	%ax # encoding: [0x66,0xff,0xc8]
+	decw %ax
+
+# CHECK-X86_32:	decl	%eax # encoding: [0x48]
+# CHECK-X86_64:	decl	%eax # encoding: [0xff,0xc8]
+	decl %eax
diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s
index 797558a81ef9..4028beea7431 100644
--- a/test/MC/AsmParser/X86/x86_64-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s
@@ -25,5 +25,30 @@ movq	$12, foo(%rip)
 // CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
 // CHECK:    fixup A - offset: 3, value: foo-8, kind: reloc_riprel_4byte
 
-// CHECK: addq	$-424, %rax             # encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
+// CHECK: addq	$-424, %rax
+// CHECK: encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
 addq $-424, %rax
+
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %rax
+// CHECK:  encoding: [0x48,0x8b,0x05,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %rax
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %r14
+// CHECK:  encoding: [0x4c,0x8b,0x35,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %r14
+
+
+// CHECK: movq	(%r13,%rax,8), %r13
+// CHECK:  encoding: [0x4d,0x8b,0x6c,0xc5,0x00]
+movq 0x00(%r13,%rax,8),%r13
+
+// CHECK: testq	%rax, %rbx
+// CHECK:  encoding: [0x48,0x85,0xd8]
+testq %rax, %rbx
+
+// CHECK: cmpq	%rbx, %r14
+// CHECK:   encoding: [0x49,0x39,0xde]
+        cmpq %rbx, %r14
diff --git a/test/MC/MachO/Darwin/dg.exp b/test/MC/MachO/Darwin/dg.exp
deleted file mode 100644
index 0f34b631570a..000000000000
--- a/test/MC/MachO/Darwin/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_darwin_and_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
diff --git a/test/MC/MachO/Darwin/x86_32_diff_as.s b/test/MC/MachO/Darwin/x86_32_diff_as.s
deleted file mode 100644
index 7fe75aae3880..000000000000
--- a/test/MC/MachO/Darwin/x86_32_diff_as.s
+++ /dev/null
@@ -1,551 +0,0 @@
-// Validate that we can assemble this file exactly like the platform
-// assembler.
-//
-// XFAIL: *
-// RUN: llvm-mc -filetype=obj -triple i386-unknown-unknown -o %t.mc.o %s
-// RUN: as -arch i386 -o %t.as.o %s
-// RUN: diff %t.mc.o %t.as.o
-
-        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	pushl	0xdeadbeef(%ebx,%ecx,8)
-        	popl	0xdeadbeef(%ebx,%ecx,8)
-        	lahf
-        	sahf
-        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	incl	0xdeadbeef(%ebx,%ecx,8)
-        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	decl	0xdeadbeef(%ebx,%ecx,8)
-        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-        	negl	0xdeadbeef(%ebx,%ecx,8)
-        	notl	0xdeadbeef(%ebx,%ecx,8)
-        	cbtw
-        	cwtl
-        	cwtd
-        	cltd
-        	mull	0xdeadbeef(%ebx,%ecx,8)
-        	imull	0xdeadbeef(%ebx,%ecx,8)
-        	divl	0xdeadbeef(%ebx,%ecx,8)
-        	idivl	0xdeadbeef(%ebx,%ecx,8)
-        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
-        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	roll	0xdeadbeef(%ebx,%ecx,8)
-        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	rorl	0xdeadbeef(%ebx,%ecx,8)
-        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
-        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	shll	0xdeadbeef(%ebx,%ecx,8)
-        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	shrl	0xdeadbeef(%ebx,%ecx,8)
-        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
-        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-        	sarl	0xdeadbeef(%ebx,%ecx,8)
-        	call	*%ecx
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
-        	lret
-        	leave
-        	seto	%bl
-        	seto	0xdeadbeef(%ebx,%ecx,8)
-        	setno	%bl
-        	setno	0xdeadbeef(%ebx,%ecx,8)
-        	setb	%bl
-        	setb	0xdeadbeef(%ebx,%ecx,8)
-        	setae	%bl
-        	setae	0xdeadbeef(%ebx,%ecx,8)
-        	sete	%bl
-        	sete	0xdeadbeef(%ebx,%ecx,8)
-        	setne	%bl
-        	setne	0xdeadbeef(%ebx,%ecx,8)
-        	setbe	%bl
-        	setbe	0xdeadbeef(%ebx,%ecx,8)
-        	seta	%bl
-        	seta	0xdeadbeef(%ebx,%ecx,8)
-        	sets	%bl
-        	sets	0xdeadbeef(%ebx,%ecx,8)
-        	setns	%bl
-        	setns	0xdeadbeef(%ebx,%ecx,8)
-        	setp	%bl
-        	setp	0xdeadbeef(%ebx,%ecx,8)
-        	setnp	%bl
-        	setnp	0xdeadbeef(%ebx,%ecx,8)
-        	setl	%bl
-        	setl	0xdeadbeef(%ebx,%ecx,8)
-        	setge	%bl
-        	setge	0xdeadbeef(%ebx,%ecx,8)
-        	setle	%bl
-        	setle	0xdeadbeef(%ebx,%ecx,8)
-        	setg	%bl
-        	setg	0xdeadbeef(%ebx,%ecx,8)
-        	nopl	0xdeadbeef(%ebx,%ecx,8)
-        	nop
-        	fldl	0xdeadbeef(%ebx,%ecx,8)
-        	fildl	0xdeadbeef(%ebx,%ecx,8)
-        	fildll	0xdeadbeef(%ebx,%ecx,8)
-        	fldt	0xdeadbeef(%ebx,%ecx,8)
-        	fbld	0xdeadbeef(%ebx,%ecx,8)
-        	fstl	0xdeadbeef(%ebx,%ecx,8)
-        	fistl	0xdeadbeef(%ebx,%ecx,8)
-        	fstpl	0xdeadbeef(%ebx,%ecx,8)
-        	fistpl	0xdeadbeef(%ebx,%ecx,8)
-        	fistpll	0xdeadbeef(%ebx,%ecx,8)
-        	fstpt	0xdeadbeef(%ebx,%ecx,8)
-        	fbstp	0xdeadbeef(%ebx,%ecx,8)
-        	ficoml	0xdeadbeef(%ebx,%ecx,8)
-        	ficompl	0xdeadbeef(%ebx,%ecx,8)
-        	fucompp
-        	ftst
-        	fld1
-        	fldz
-        	faddl	0xdeadbeef(%ebx,%ecx,8)
-        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
-        	fsubl	0xdeadbeef(%ebx,%ecx,8)
-        	fisubl	0xdeadbeef(%ebx,%ecx,8)
-        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
-        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
-        	fmull	0xdeadbeef(%ebx,%ecx,8)
-        	fimull	0xdeadbeef(%ebx,%ecx,8)
-        	fdivl	0xdeadbeef(%ebx,%ecx,8)
-        	fidivl	0xdeadbeef(%ebx,%ecx,8)
-        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
-        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
-        	fsqrt
-        	fsin
-        	fcos
-        	fchs
-        	fabs
-        	fldcw	0xdeadbeef(%ebx,%ecx,8)
-        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
-        	rdtsc
-        	sysenter
-        	sysexit
-        	ud2
-        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
-        	clflush	0xdeadbeef(%ebx,%ecx,8)
-        	emms
-        	movd	%ecx,%mm3
-        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	movd	%ecx,%xmm5
-        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movd	%xmm5,%ecx
-        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	movq	%mm3,%mm3
-        	movq	%mm3,%mm3
-        	movq	%xmm5,%xmm5
-        	movq	%xmm5,%xmm5
-        	packssdw	%mm3,%mm3
-        	packssdw	%xmm5,%xmm5
-        	packsswb	%mm3,%mm3
-        	packsswb	%xmm5,%xmm5
-        	packuswb	%mm3,%mm3
-        	packuswb	%xmm5,%xmm5
-        	paddb	%mm3,%mm3
-        	paddb	%xmm5,%xmm5
-        	paddw	%mm3,%mm3
-        	paddw	%xmm5,%xmm5
-        	paddd	%mm3,%mm3
-        	paddd	%xmm5,%xmm5
-        	paddq	%mm3,%mm3
-        	paddq	%xmm5,%xmm5
-        	paddsb	%mm3,%mm3
-        	paddsb	%xmm5,%xmm5
-        	paddsw	%mm3,%mm3
-        	paddsw	%xmm5,%xmm5
-        	paddusb	%mm3,%mm3
-        	paddusb	%xmm5,%xmm5
-        	paddusw	%mm3,%mm3
-        	paddusw	%xmm5,%xmm5
-        	pand	%mm3,%mm3
-        	pand	%xmm5,%xmm5
-        	pandn	%mm3,%mm3
-        	pandn	%xmm5,%xmm5
-        	pcmpeqb	%mm3,%mm3
-        	pcmpeqb	%xmm5,%xmm5
-        	pcmpeqw	%mm3,%mm3
-        	pcmpeqw	%xmm5,%xmm5
-        	pcmpeqd	%mm3,%mm3
-        	pcmpeqd	%xmm5,%xmm5
-        	pcmpgtb	%mm3,%mm3
-        	pcmpgtb	%xmm5,%xmm5
-        	pcmpgtw	%mm3,%mm3
-        	pcmpgtw	%xmm5,%xmm5
-        	pcmpgtd	%mm3,%mm3
-        	pcmpgtd	%xmm5,%xmm5
-        	pmaddwd	%mm3,%mm3
-        	pmaddwd	%xmm5,%xmm5
-        	pmulhw	%mm3,%mm3
-        	pmulhw	%xmm5,%xmm5
-        	pmullw	%mm3,%mm3
-        	pmullw	%xmm5,%xmm5
-        	por	%mm3,%mm3
-        	por	%xmm5,%xmm5
-        	psllw	%mm3,%mm3
-        	psllw	%xmm5,%xmm5
-        	psllw	$0x7f,%mm3
-        	psllw	$0x7f,%xmm5
-        	pslld	%mm3,%mm3
-        	pslld	%xmm5,%xmm5
-        	pslld	$0x7f,%mm3
-        	pslld	$0x7f,%xmm5
-        	psllq	%mm3,%mm3
-        	psllq	%xmm5,%xmm5
-        	psllq	$0x7f,%mm3
-        	psllq	$0x7f,%xmm5
-        	psraw	%mm3,%mm3
-        	psraw	%xmm5,%xmm5
-        	psraw	$0x7f,%mm3
-        	psraw	$0x7f,%xmm5
-        	psrad	%mm3,%mm3
-        	psrad	%xmm5,%xmm5
-        	psrad	$0x7f,%mm3
-        	psrad	$0x7f,%xmm5
-        	psrlw	%mm3,%mm3
-        	psrlw	%xmm5,%xmm5
-        	psrlw	$0x7f,%mm3
-        	psrlw	$0x7f,%xmm5
-        	psrld	%mm3,%mm3
-        	psrld	%xmm5,%xmm5
-        	psrld	$0x7f,%mm3
-        	psrld	$0x7f,%xmm5
-        	psrlq	%mm3,%mm3
-        	psrlq	%xmm5,%xmm5
-        	psrlq	$0x7f,%mm3
-        	psrlq	$0x7f,%xmm5
-        	psubb	%mm3,%mm3
-        	psubb	%xmm5,%xmm5
-        	psubw	%mm3,%mm3
-        	psubw	%xmm5,%xmm5
-        	psubd	%mm3,%mm3
-        	psubd	%xmm5,%xmm5
-        	psubq	%mm3,%mm3
-        	psubq	%xmm5,%xmm5
-        	psubsb	%mm3,%mm3
-        	psubsb	%xmm5,%xmm5
-        	psubsw	%mm3,%mm3
-        	psubsw	%xmm5,%xmm5
-        	psubusb	%mm3,%mm3
-        	psubusb	%xmm5,%xmm5
-        	psubusw	%mm3,%mm3
-        	psubusw	%xmm5,%xmm5
-        	punpckhbw	%mm3,%mm3
-        	punpckhbw	%xmm5,%xmm5
-        	punpckhwd	%mm3,%mm3
-        	punpckhwd	%xmm5,%xmm5
-        	punpckhdq	%mm3,%mm3
-        	punpckhdq	%xmm5,%xmm5
-        	punpcklbw	%mm3,%mm3
-        	punpcklbw	%xmm5,%xmm5
-        	punpcklwd	%mm3,%mm3
-        	punpcklwd	%xmm5,%xmm5
-        	punpckldq	%mm3,%mm3
-        	punpckldq	%xmm5,%xmm5
-        	pxor	%mm3,%mm3
-        	pxor	%xmm5,%xmm5
-        	addps	%xmm5,%xmm5
-        	addss	%xmm5,%xmm5
-        	andnps	%xmm5,%xmm5
-        	andps	%xmm5,%xmm5
-        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpi2ps	%mm3,%xmm5
-        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvtps2pi	%xmm5,%mm3
-        	cvtsi2ss	%ecx,%xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvttps2pi	%xmm5,%mm3
-        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	cvttss2si	%xmm5,%ecx
-        	divps	%xmm5,%xmm5
-        	divss	%xmm5,%xmm5
-        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
-        	maskmovq	%mm3,%mm3
-        	maxps	%xmm5,%xmm5
-        	maxss	%xmm5,%xmm5
-        	minps	%xmm5,%xmm5
-        	minss	%xmm5,%xmm5
-        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movaps	%xmm5,%xmm5
-        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movaps	%xmm5,%xmm5
-        	movhlps	%xmm5,%xmm5
-        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movlhps	%xmm5,%xmm5
-        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movmskps	%xmm5,%ecx
-        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movss	%xmm5,%xmm5
-        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movss	%xmm5,%xmm5
-        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movups	%xmm5,%xmm5
-        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movups	%xmm5,%xmm5
-        	mulps	%xmm5,%xmm5
-        	mulss	%xmm5,%xmm5
-        	orps	%xmm5,%xmm5
-        	pavgb	%mm3,%mm3
-        	pavgb	%xmm5,%xmm5
-        	pavgw	%mm3,%mm3
-        	pavgw	%xmm5,%xmm5
-        	pmaxsw	%mm3,%mm3
-        	pmaxsw	%xmm5,%xmm5
-        	pmaxub	%mm3,%mm3
-        	pmaxub	%xmm5,%xmm5
-        	pminsw	%mm3,%mm3
-        	pminsw	%xmm5,%xmm5
-        	pminub	%mm3,%mm3
-        	pminub	%xmm5,%xmm5
-        	pmovmskb	%mm3,%ecx
-        	pmovmskb	%xmm5,%ecx
-        	pmulhuw	%mm3,%mm3
-        	pmulhuw	%xmm5,%xmm5
-        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
-        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
-        	psadbw	%mm3,%mm3
-        	psadbw	%xmm5,%xmm5
-        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rcpps	%xmm5,%xmm5
-        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rcpss	%xmm5,%xmm5
-        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rsqrtps	%xmm5,%xmm5
-        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	rsqrtss	%xmm5,%xmm5
-        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtps	%xmm5,%xmm5
-        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtss	%xmm5,%xmm5
-        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
-        	subps	%xmm5,%xmm5
-        	subss	%xmm5,%xmm5
-        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ucomiss	%xmm5,%xmm5
-        	unpckhps	%xmm5,%xmm5
-        	unpcklps	%xmm5,%xmm5
-        	xorps	%xmm5,%xmm5
-        	addpd	%xmm5,%xmm5
-        	addsd	%xmm5,%xmm5
-        	andnpd	%xmm5,%xmm5
-        	andpd	%xmm5,%xmm5
-        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	comisd	%xmm5,%xmm5
-        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpi2pd	%mm3,%xmm5
-        	cvtsi2sd	%ecx,%xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	divpd	%xmm5,%xmm5
-        	divsd	%xmm5,%xmm5
-        	maxpd	%xmm5,%xmm5
-        	maxsd	%xmm5,%xmm5
-        	minpd	%xmm5,%xmm5
-        	minsd	%xmm5,%xmm5
-        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movapd	%xmm5,%xmm5
-        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movapd	%xmm5,%xmm5
-        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movmskpd	%xmm5,%ecx
-        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movsd	%xmm5,%xmm5
-        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movsd	%xmm5,%xmm5
-        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movupd	%xmm5,%xmm5
-        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movupd	%xmm5,%xmm5
-        	mulpd	%xmm5,%xmm5
-        	mulsd	%xmm5,%xmm5
-        	orpd	%xmm5,%xmm5
-        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtpd	%xmm5,%xmm5
-        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	sqrtsd	%xmm5,%xmm5
-        	subpd	%xmm5,%xmm5
-        	subsd	%xmm5,%xmm5
-        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ucomisd	%xmm5,%xmm5
-        	unpckhpd	%xmm5,%xmm5
-        	unpcklpd	%xmm5,%xmm5
-        	xorpd	%xmm5,%xmm5
-        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtdq2pd	%xmm5,%xmm5
-        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtpd2dq	%xmm5,%xmm5
-        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtdq2ps	%xmm5,%xmm5
-        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvtpd2pi	%xmm5,%mm3
-        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtps2dq	%xmm5,%xmm5
-        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtsd2ss	%xmm5,%xmm5
-        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	cvtss2sd	%xmm5,%xmm5
-        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	cvttpd2pi	%xmm5,%mm3
-        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-        	cvttsd2si	%xmm5,%ecx
-        	maskmovdqu	%xmm5,%xmm5
-        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movdqa	%xmm5,%xmm5
-        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movdqa	%xmm5,%xmm5
-        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-        	movdq2q	%xmm5,%mm3
-        	movq2dq	%mm3,%xmm5
-        	pmuludq	%mm3,%mm3
-        	pmuludq	%xmm5,%xmm5
-        	pslldq	$0x7f,%xmm5
-        	psrldq	$0x7f,%xmm5
-        	punpckhqdq	%xmm5,%xmm5
-        	punpcklqdq	%xmm5,%xmm5
-        	addsubpd	%xmm5,%xmm5
-        	addsubps	%xmm5,%xmm5
-        	haddpd	%xmm5,%xmm5
-        	haddps	%xmm5,%xmm5
-        	hsubpd	%xmm5,%xmm5
-        	hsubps	%xmm5,%xmm5
-        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movddup	%xmm5,%xmm5
-        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movshdup	%xmm5,%xmm5
-        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	movsldup	%xmm5,%xmm5
-        	phaddw	%mm3,%mm3
-        	phaddw	%xmm5,%xmm5
-        	phaddd	%mm3,%mm3
-        	phaddd	%xmm5,%xmm5
-        	phaddsw	%mm3,%mm3
-        	phaddsw	%xmm5,%xmm5
-        	phsubw	%mm3,%mm3
-        	phsubw	%xmm5,%xmm5
-        	phsubd	%mm3,%mm3
-        	phsubd	%xmm5,%xmm5
-        	phsubsw	%mm3,%mm3
-        	phsubsw	%xmm5,%xmm5
-        	pmaddubsw	%mm3,%mm3
-        	pmaddubsw	%xmm5,%xmm5
-        	pmulhrsw	%mm3,%mm3
-        	pmulhrsw	%xmm5,%xmm5
-        	pshufb	%mm3,%mm3
-        	pshufb	%xmm5,%xmm5
-        	psignb	%mm3,%mm3
-        	psignb	%xmm5,%xmm5
-        	psignw	%mm3,%mm3
-        	psignw	%xmm5,%xmm5
-        	psignd	%mm3,%mm3
-        	psignd	%xmm5,%xmm5
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsb	%mm3,%mm3
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsb	%xmm5,%xmm5
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsw	%mm3,%mm3
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsw	%xmm5,%xmm5
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
-        	pabsd	%mm3,%mm3
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pabsd	%xmm5,%xmm5
-        	femms
-        	packusdw	%xmm5,%xmm5
-        	pcmpeqq	%xmm5,%xmm5
-        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	phminposuw	%xmm5,%xmm5
-        	pmaxsb	%xmm5,%xmm5
-        	pmaxsd	%xmm5,%xmm5
-        	pmaxud	%xmm5,%xmm5
-        	pmaxuw	%xmm5,%xmm5
-        	pminsb	%xmm5,%xmm5
-        	pminsd	%xmm5,%xmm5
-        	pminud	%xmm5,%xmm5
-        	pminuw	%xmm5,%xmm5
-        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbw	%xmm5,%xmm5
-        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbd	%xmm5,%xmm5
-        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxbq	%xmm5,%xmm5
-        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxwd	%xmm5,%xmm5
-        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxwq	%xmm5,%xmm5
-        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovsxdq	%xmm5,%xmm5
-        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbw	%xmm5,%xmm5
-        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbd	%xmm5,%xmm5
-        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxbq	%xmm5,%xmm5
-        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxwd	%xmm5,%xmm5
-        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxwq	%xmm5,%xmm5
-        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	pmovzxdq	%xmm5,%xmm5
-        	pmuldq	%xmm5,%xmm5
-        	pmulld	%xmm5,%xmm5
-        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
-        	ptest	%xmm5,%xmm5
-        	pcmpgtq	%xmm5,%xmm5
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
new file mode 100644
index 000000000000..38fa074fde22
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -0,0 +1,329 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .text
+
+// FIXME: llvm-mc doesn't handle this in a way we can make compatible with 'as',
+// currently, because of how we handle assembler variables.
+//
+// See <rdar://problem/7763719> improve handling of absolute symbols
+
+// _baz = 4
+
+_foo:
+        xorl %eax,%eax
+_g0:
+        xorl %eax,%eax
+L0:
+        jmp 4
+//        jmp _baz
+
+// FIXME: Darwin 'as' for historical reasons widens this jump, but doesn't emit
+// a relocation. It seems like 'as' widens any jump that is not to a temporary,
+// which is inherited from the x86_32 behavior, even though x86_64 could do
+// better.
+//        jmp _g0
+
+        jmp L0
+        jmp _g1
+
+// FIXME: Darwin 'as' gets this wrong as well, even though it could get it right
+// given the other things we do on x86_64. It is using a short jump here. This
+// is probably fallout of the hack that exists for x86_32.
+//        jmp L1
+
+// FIXME: We don't support this, and would currently get it wrong, it should be a jump to an absolute address.
+//        jmp L0 - _g0
+
+//        jmp _g1 - _g0
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g1 - L1
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L1 - _g0
+
+        jmp _g2
+        jmp L2
+        jmp _g3
+        jmp L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L2 - _g3
+//        jmp _g3 - _g2
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g3 - L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L3 - _g2
+
+        movl %eax,4(%rip)
+//        movl %eax,_baz(%rip)
+        movl %eax,_g0(%rip)
+        movl %eax,L0(%rip)
+        movl %eax,_g1(%rip)
+        movl %eax,L1(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L0 - _g0(%rip)
+//        movl %eax,_g1 - _g0(%rip)
+//        movl %eax,_g1 - L1(%rip)
+//        movl %eax,L1 - _g0(%rip)
+
+        movl %eax,_g2(%rip)
+        movl %eax,L2(%rip)
+        movl %eax,_g3(%rip)
+        movl %eax,L3(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L2 - _g2(%rip)
+//        movl %eax,_g3 - _g2(%rip)
+//        movl %eax,_g3 - L3(%rip)
+//        movl %eax,L3 - _g2(%rip)
+
+_g1:
+        xorl %eax,%eax
+L1:
+        xorl %eax,%eax
+
+        .data
+_g2:
+        xorl %eax,%eax
+L2:
+        .quad 4
+//        .quad _baz
+        .quad _g2
+        .quad L2
+        .quad _g3
+        .quad L3
+        .quad L2 - _g2
+        .quad _g3 - _g2
+        .quad L3 - _g2
+        .quad L3 - _g3
+
+        .quad _g0
+        .quad L0
+        .quad _g1
+        .quad L1
+        .quad L0 - _g0
+        .quad _g1 - _g0
+        .quad L1 - _g0
+        .quad L1 - _g1
+
+_g3:
+        xorl %eax,%eax
+L3:
+        xorl %eax,%eax
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 236)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 236)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 94)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 604)
+// CHECK:     ('num_reloc', 12)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+
+// FIXME: Unfortunately, we do not get these relocations in exactly the same
+// order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
+// them in reverse address order, but sometimes it allocates some
+// additional relocations late so these end up preceed the other entries. I
+// haven't figured out the exact criteria for this yet.
+        
+// CHECK:     (('word-0', 0x56),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x50),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x44),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x3e),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x1b),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x16),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0x11),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0x2d000002)),
+// CHECK:     (('word-0', 0x5),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 94)
+// CHECK:     ('size', 142)
+// CHECK:     ('offset', 462)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 700)
+// CHECK:     ('num_reloc', 16)
+// CHECK:     ('flags', 0x400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x62),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x52),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x22),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x12),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xa),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 828)
+// CHECK:   ('nsyms', 5)
+// CHECK:   ('stroff', 908)
+// CHECK:   ('strsize', 24)
+// CHECK:   ('_string_data', '\x00_foo\x00_g0\x00_g1\x00_g2\x00_g3\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 6)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', '_g0')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 90)
+// CHECK:     ('_string', '_g1')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 14)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 94)
+// CHECK:     ('_string', '_g2')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 18)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 232)
+// CHECK:     ('_string', '_g3')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 5)
+// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 5)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
new file mode 100644
index 000000000000..ab6820e4a82a
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -0,0 +1,343 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .data
+
+        .org 0x10
+L0:
+        .long 0
+        .long 0
+        .long 0
+        .long 0
+
+_d:
+        .long 0
+L1:
+        .long 0
+
+        .text
+
+// These generate normal x86_64 (external) relocations. They could all use
+// SIGNED, but don't for pedantic compatibility with Darwin 'as'.
+
+        // SIGNED1
+ 	movb  $0x12, _d(%rip)
+
+        // SIGNED
+ 	movb  $0x12, _d + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, _d(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, _d + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, _d + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 4(%rip)
+
+	movb  %al, _d(%rip)
+ 	movb  %al, _d + 1(%rip)
+ 	movl  %eax, _d(%rip)
+ 	movl  %eax, _d + 1(%rip)
+ 	movl  %eax, _d + 2(%rip)
+ 	movl  %eax, _d + 3(%rip)
+ 	movl  %eax, _d + 4(%rip)
+
+// These have to use local relocations. Since that uses an offset into the
+// section in x86_64 (as opposed to a scattered relocation), and since the
+// linker can only decode this to an atom + offset by scanning the section,
+// it is not possible to correctly encode these without SIGNED<N>. This is
+// ultimately due to a design flaw in the x86_64 relocation format, it is
+// not possible to encode an address (L<foo> + <constant>) which is outside the
+// atom containing L<foo>.
+
+        // SIGNED1
+ 	movb  $0x12, L0(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L0 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L0(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L0 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L0 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 4(%rip)
+
+ 	movb  %al, L0(%rip)
+ 	movb  %al, L0 + 1(%rip)
+ 	movl  %eax, L0(%rip)
+ 	movl  %eax, L0 + 1(%rip)
+ 	movl  %eax, L0 + 2(%rip)
+ 	movl  %eax, L0 + 3(%rip)
+ 	movl  %eax, L0 + 4(%rip)
+
+        // SIGNED1
+ 	movb  $0x12, L1(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L1 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L1(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L1 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L1 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 4(%rip)
+
+ 	movb  %al, L1(%rip)
+ 	movb  %al, L1 + 1(%rip)
+ 	movl  %eax, L1(%rip)
+ 	movl  %eax, L1 + 1(%rip)
+ 	movl  %eax, L1 + 2(%rip)
+ 	movl  %eax, L1 + 3(%rip)
+ 	movl  %eax, L1 + 4(%rip)
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 358)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 358)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 318)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 728)
+// CHECK:     ('num_reloc', 42)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x13a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x134),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x12e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x128),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x122),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x11c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x116),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x10c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x102),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0xf8),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0xee),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0xe4),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0xdd),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0xd6),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0xd0),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xca),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0xc4),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0xbe),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0xb8),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0xb2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0xac),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0xa2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x98),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x8e),
+// CHECK:      ('word-1', 0x75000002)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0x84),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x85000002)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x73),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 27
+// CHECK:     (('word-0', 0x6c),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 28
+// CHECK:     (('word-0', 0x66),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 29
+// CHECK:     (('word-0', 0x60),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 30
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 31
+// CHECK:     (('word-0', 0x54),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 32
+// CHECK:     (('word-0', 0x4e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 33
+// CHECK:     (('word-0', 0x48),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 34
+// CHECK:     (('word-0', 0x42),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 35
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 36
+// CHECK:     (('word-0', 0x2e),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 37
+// CHECK:     (('word-0', 0x24),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 38
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 39
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 40
+// CHECK:     (('word-0', 0x9),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 41
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc6\x05\xff\xff\xff\xff\x12\xc6\x05\x00\x00\x00\x00\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\xc7\x05\xfd\xff\xff\xffxV4\x12\xc7\x05\xfe\xff\xff\xffxV4\x12\xc7\x05\xff\xff\xff\xffxV4\x12\xc7\x05\x00\x00\x00\x00xV4\x12\x88\x05\x00\x00\x00\x00\x88\x05\x01\x00\x00\x00\x89\x05\x00\x00\x00\x00\x89\x05\x01\x00\x00\x00\x89\x05\x02\x00\x00\x00\x89\x05\x03\x00\x00\x00\x89\x05\x04\x00\x00\x00\xc6\x05\xdd\x00\x00\x00\x12\xc6\x05\xd7\x00\x00\x00\x12\xc7\x05\xcc\x00\x00\x00xV4\x12\xc7\x05\xc3\x00\x00\x00xV4\x12\xc7\x05\xba\x00\x00\x00xV4\x12\xc7\x05\xb1\x00\x00\x00xV4\x12\xc7\x05\xa8\x00\x00\x00xV4\x12\x88\x05\x9e\x00\x00\x00\x88\x05\x99\x00\x00\x00\x89\x05\x92\x00\x00\x00\x89\x05\x8d\x00\x00\x00\x89\x05\x88\x00\x00\x00\x89\x05\x83\x00\x00\x00\x89\x05~\x00\x00\x00\xc6\x05\x03\x00\x00\x00\x12\xc6\x05\x04\x00\x00\x00\x12\xc7\x05\x00\x00\x00\x00xV4\x12\xc7\x05\x01\x00\x00\x00xV4\x12\xc7\x05\x02\x00\x00\x00xV4\x12\xc7\x05\x03\x00\x00\x00xV4\x12\xc7\x05\x04\x00\x00\x00xV4\x12\x88\x05\x04\x00\x00\x00\x88\x05\x05\x00\x00\x00\x89\x05\x04\x00\x00\x00\x89\x05\x05\x00\x00\x00\x89\x05\x06\x00\x00\x00\x89\x05\x07\x00\x00\x00\x89\x05\x08\x00\x00\x00')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 318)
+// CHECK:     ('size', 40)
+// CHECK:     ('offset', 686)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 1064)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 1080)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00_d\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 350)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/darwin-x86_64-reloc.s b/test/MC/MachO/darwin-x86_64-reloc.s
new file mode 100644
index 000000000000..6b325b085eca
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-reloc.s
@@ -0,0 +1,229 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// These examples are taken from <mach-o/x86_64/reloc.h>.
+
+        .text
+_foo:
+        ret
+
+_baz:
+        call _foo
+ 	call _foo+4
+ 	movq _foo@GOTPCREL(%rip), %rax
+ 	pushq _foo@GOTPCREL(%rip)
+ 	movl _foo(%rip), %eax
+ 	movl _foo+4(%rip), %eax
+ 	movb  $0x12, _foo(%rip)
+ 	movl  $0x12345678, _foo(%rip)
+ 	.quad _foo
+_bar:
+ 	.quad _foo+4
+ 	.quad _foo - _bar
+ 	.quad _foo - _bar + 4
+ 	.long _foo - _bar
+ 	leaq L1(%rip), %rax
+ 	leaq L0(%rip), %rax
+        addl $6,L0(%rip)
+        addw $500,L0(%rip)
+        addl $500,L0(%rip)
+
+_prev:
+        .space 12,0x90
+ 	.quad L1
+L0:
+        .quad L0
+L_pc:
+ 	.quad _foo - L_pc
+ 	.quad _foo - L1
+L1:
+ 	.quad L1 - _prev
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 256)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 152)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 181)
+// CHECK:   ('file_offset', 288)
+// CHECK:   ('file_size', 181)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 181)
+// CHECK:     ('offset', 288)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 472)
+// CHECK:     ('num_reloc', 27)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x95),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x8d),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x79),
+// CHECK:      ('word-1', 0x8d000003)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x71),
+// CHECK:      ('word-1', 0x7d000003)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x69),
+// CHECK:      ('word-1', 0x6d000003)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x63),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x5c),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0xc000000)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0x3d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0x35),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0x2d),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0x26),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x14),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0xe),
+// CHECK:      ('word-1', 0x3d000000)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x7),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 688)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 752)
+// CHECK:   ('strsize', 24)
+// CHECK:   ('_string_data', '\x00_foo\x00_baz\x00_bar\x00_prev\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 6)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 1)
+// CHECK:     ('_string', '_baz')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 61)
+// CHECK:     ('_string', '_bar')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 129)
+// CHECK:     ('_string', '_prev')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 4)
+// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
index e86ed8c6deb6..c305eeb723c3 100644
--- a/test/MC/MachO/reloc.s
+++ b/test/MC/MachO/reloc.s
@@ -10,7 +10,7 @@ local_a_ext:
 
 local_a:
         .long 0
-local_a_elt:      
+local_a_elt:
         .long 0
 local_b:
         .long local_b - local_c + 245
@@ -27,9 +27,20 @@ local_c:
         .const
 
         .long
-bar:    
+bar:
         .long local_a_elt - bar + 33
 
+L0:
+        .long L0
+        .long L1
+
+        .text
+_f0:
+L1:
+        jmp L0
+        jmp L1
+        ret
+
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
@@ -42,9 +53,9 @@ bar:
 // CHECK:   ('size', 260)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 47)
+// CHECK:   ('vm_size', 63)
 // CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 47)
+// CHECK:   ('file_size', 63)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
 // CHECK:   ('num_sections', 3)
@@ -54,26 +65,29 @@ bar:
 // CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
+// CHECK:     ('size', 8)
 // CHECK:     ('offset', 392)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reloc_offset', 456)
+// CHECK:     ('num_reloc', 1)
+// CHECK:     ('flags', 0x80000400)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x1),
+// CHECK:      ('word-1', 0x5000003)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '')
+// CHECK:   ('_section_data', '\xe92\x00\x00\x00\xeb\xf9\xc3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
+// CHECK:     ('address', 8)
 // CHECK:     ('size', 43)
-// CHECK:     ('offset', 392)
+// CHECK:     ('offset', 400)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 440)
+// CHECK:     ('reloc_offset', 464)
 // CHECK:     ('num_reloc', 9)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
@@ -82,72 +96,78 @@ bar:
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
 // CHECK:     (('word-0', 0x8000002a),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 1
 // CHECK:     (('word-0', 0x90000028),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 2
 // CHECK:     (('word-0', 0xa0000024),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa0000020),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:      ('word-1', 0x18)),
 // CHECK:     # Relocation 4
 // CHECK:     (('word-0', 0xa4000014),
-// CHECK:      ('word-1', 0x14)),
+// CHECK:      ('word-1', 0x1c)),
 // CHECK:     # Relocation 5
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x1c)),
+// CHECK:      ('word-1', 0x24)),
 // CHECK:     # Relocation 6
 // CHECK:     (('word-0', 0x8),
 // CHECK:      ('word-1', 0x4000002)),
 // CHECK:     # Relocation 7
 // CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0xc000006)),
+// CHECK:      ('word-1', 0xc000007)),
 // CHECK:     # Relocation 8
 // CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xc000006)),
+// CHECK:      ('word-1', 0xc000007)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x1a\x00\x00\x00$\x00i')
+// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 43)
-// CHECK:     ('size', 4)
-// CHECK:     ('offset', 435)
+// CHECK:     ('address', 51)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 443)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 512)
-// CHECK:     ('num_reloc', 2)
+// CHECK:     ('reloc_offset', 536)
+// CHECK:     ('num_reloc', 4)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xa4000000),
-// CHECK:      ('word-1', 0x10)),
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4000001)),
 // CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0x4000003)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa4000000),
+// CHECK:      ('word-1', 0x18)),
+// CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x2b)),
+// CHECK:      ('word-1', 0x33)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x06\x00\x00\x00')
+// CHECK:   ('_section_data', '\x06\x00\x00\x007\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
-// CHECK:   ('symoff', 528)
-// CHECK:   ('nsyms', 7)
-// CHECK:   ('stroff', 612)
-// CHECK:   ('strsize', 60)
-// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00\x00')
+// CHECK:   ('symoff', 568)
+// CHECK:   ('nsyms', 8)
+// CHECK:   ('stroff', 664)
+// CHECK:   ('strsize', 64)
+// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
 // CHECK:    (('n_strx', 19)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 12)
+// CHECK:     ('n_value', 20)
 // CHECK:     ('_string', 'local_a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
@@ -155,7 +175,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
+// CHECK:     ('n_value', 24)
 // CHECK:     ('_string', 'local_a_elt')
 // CHECK:    ),
 // CHECK:     # Symbol 2
@@ -163,7 +183,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 20)
+// CHECK:     ('n_value', 28)
 // CHECK:     ('_string', 'local_b')
 // CHECK:    ),
 // CHECK:     # Symbol 3
@@ -171,7 +191,7 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 28)
+// CHECK:     ('n_value', 36)
 // CHECK:     ('_string', 'local_c')
 // CHECK:    ),
 // CHECK:     # Symbol 4
@@ -179,18 +199,26 @@ bar:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 43)
+// CHECK:     ('n_value', 51)
 // CHECK:     ('_string', 'bar')
 // CHECK:    ),
 // CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 59)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f0')
+// CHECK:    ),
+// CHECK:     # Symbol 6
 // CHECK:    (('n_strx', 7)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
+// CHECK:     ('n_value', 16)
 // CHECK:     ('_string', 'local_a_ext')
 // CHECK:    ),
-// CHECK:     # Symbol 6
+// CHECK:     # Symbol 7
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -204,10 +232,10 @@ bar:
 // CHECK:  (('command', 11)
 // CHECK:   ('size', 80)
 // CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 5)
-// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nlocalsym', 6)
+// CHECK:   ('iextdefsym', 6)
 // CHECK:   ('nextdefsym', 1)
-// CHECK:   ('iundefsym', 6)
+// CHECK:   ('iundefsym', 7)
 // CHECK:   ('nundefsym', 1)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
diff --git a/test/MC/MachO/Darwin/optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
similarity index 59%
rename from test/MC/MachO/Darwin/optimal_nop.s
rename to test/MC/MachO/x86_32-optimal_nop.s
index 29cb0738e195..d21d1439e107 100644
--- a/test/MC/MachO/Darwin/optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -1,9 +1,4 @@
-// Validate that we can assemble this file exactly like the platform
-// assembler.
-//
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin10 -o %t.mc.o %s
-// RUN: as -arch i386 -o %t.as.o %s
-// RUN: diff %t.mc.o %t.as.o
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
 
 # 1 byte nop test
         .align 4, 0 # start with 16 byte alignment filled with zeros
@@ -154,3 +149,43 @@
         # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
         .align 4, 0x90
         ret
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 124)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 337)
+// CHECK:   ('file_offset', 152)
+// CHECK:   ('file_size', 337)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 337)
+// CHECK:     ('offset', 152)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '\xc3\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\x0f\x1f@\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3f\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3f\x0f\x1fD\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/TableGen/2003-08-03-PassCode.td b/test/TableGen/2003-08-03-PassCode.td
index 71421865a588..c02f499b3822 100644
--- a/test/TableGen/2003-08-03-PassCode.td
+++ b/test/TableGen/2003-08-03-PassCode.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class test<code C> {
   code Code = C;
diff --git a/test/TableGen/2006-09-18-LargeInt.td b/test/TableGen/2006-09-18-LargeInt.td
index afd813fab653..194699acc632 100644
--- a/test/TableGen/2006-09-18-LargeInt.td
+++ b/test/TableGen/2006-09-18-LargeInt.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep -- 4294901760
+// XFAIL: vg_leak
 
 def X {
   int Y = 0xFFFF0000;
diff --git a/test/TableGen/AnonDefinitionOnDemand.td b/test/TableGen/AnonDefinitionOnDemand.td
index d567fc807e85..b10ad5870de1 100644
--- a/test/TableGen/AnonDefinitionOnDemand.td
+++ b/test/TableGen/AnonDefinitionOnDemand.td
@@ -1,4 +1,5 @@
 // RUN: tblgen < %s
+// XFAIL: vg_leak
 
 class foo<int X> { int THEVAL = X; }
 def foo_imp : foo<1>;
diff --git a/test/TableGen/DagDefSubst.td b/test/TableGen/DagDefSubst.td
index e5eebe99e8c1..92a207f41829 100644
--- a/test/TableGen/DagDefSubst.td
+++ b/test/TableGen/DagDefSubst.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {dag d = (X Y)}
 // RUN: tblgen %s | grep {dag e = (Y X)}
+// XFAIL: vg_leak
 def X;
 
 class yclass;
diff --git a/test/TableGen/DagIntSubst.td b/test/TableGen/DagIntSubst.td
index 3c1291c3eca6..00fde694e7dc 100644
--- a/test/TableGen/DagIntSubst.td
+++ b/test/TableGen/DagIntSubst.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {dag d = (X 13)}
+// XFAIL: vg_leak
 def X;
 
 class C<int N> {
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index 4f37edf056c5..9e1667052697 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 4
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/ForwardRef.td b/test/TableGen/ForwardRef.td
index 2056b1faff35..955cc14248f8 100644
--- a/test/TableGen/ForwardRef.td
+++ b/test/TableGen/ForwardRef.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s -o -
+// XFAIL: vg_leak
 
 class bar {
   list<bar> x;
diff --git a/test/TableGen/GeneralList.td b/test/TableGen/GeneralList.td
index 7f099f286499..ca92a213b228 100644
--- a/test/TableGen/GeneralList.td
+++ b/test/TableGen/GeneralList.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 //
 // Test to make sure that lists work with any data-type
 
diff --git a/test/TableGen/IntBitInit.td b/test/TableGen/IntBitInit.td
index b949bfea7b13..16ac9c8f912d 100644
--- a/test/TableGen/IntBitInit.td
+++ b/test/TableGen/IntBitInit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 def {
   bit A = 1;
   int B = A;
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 145fd0bb79f3..fa53562b8c27 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,5 +1,5 @@
 // RUN: tblgen %s | grep {int Y = 3}
-
+// XFAIL: vg_leak
 
 class C {
   int X = 4;
diff --git a/test/TableGen/ListArgs.td b/test/TableGen/ListArgs.td
index daa0de66bed5..a513db6da3cd 100644
--- a/test/TableGen/ListArgs.td
+++ b/test/TableGen/ListArgs.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class B<list<int> v> {
   list<int> vals = v;
diff --git a/test/TableGen/ListArgsSimple.td b/test/TableGen/ListArgsSimple.td
index b3b207825e8b..f7caed69a996 100644
--- a/test/TableGen/ListArgsSimple.td
+++ b/test/TableGen/ListArgsSimple.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class B<int v> {
   int val = v;
diff --git a/test/TableGen/ListConversion.td b/test/TableGen/ListConversion.td
index 773ed6e4d114..222b6140564e 100644
--- a/test/TableGen/ListConversion.td
+++ b/test/TableGen/ListConversion.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 class A;
 class B : A;
 
diff --git a/test/TableGen/ListSlices.td b/test/TableGen/ListSlices.td
index be794cf2174a..5848a4e48704 100644
--- a/test/TableGen/ListSlices.td
+++ b/test/TableGen/ListSlices.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 def A {
   list<int> B = [10, 20, 30, 4, 1, 1231, 20];
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index 52ba59c230f1..9f92b73dba65 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 2
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiClassDefName.td b/test/TableGen/MultiClassDefName.td
index 2e71f7d06169..138c93d9bb0c 100644
--- a/test/TableGen/MultiClassDefName.td
+++ b/test/TableGen/MultiClassDefName.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep WorldHelloCC | count 1
+// XFAIL: vg_leak
 
 class C<string n> {
   string name = n;
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index d4c4ce58daa0..9da80bad2d74 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {zing = 4} | count 28
+// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index cd9c6da15398..22bf7fbfe8cf 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[(set} | count 2
 // RUN: tblgen %s | grep {\\\[\\\]} | count 2
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/String.td b/test/TableGen/String.td
index d2ae451c295d..fc0f5b8eb546 100644
--- a/test/TableGen/String.td
+++ b/test/TableGen/String.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s 
+// XFAIL: vg_leak
 class x {
   string y = "missing terminating '\"' character";
 }
diff --git a/test/TableGen/SuperSubclassSameName.td b/test/TableGen/SuperSubclassSameName.td
index 087df87124bf..304c883417fa 100644
--- a/test/TableGen/SuperSubclassSameName.td
+++ b/test/TableGen/SuperSubclassSameName.td
@@ -1,4 +1,5 @@
 // RUN: tblgen < %s
+// XFAIL: vg_leak
 // Test for template arguments that have the same name as superclass template
 // arguments.
 
diff --git a/test/TableGen/TargetInstrInfo.td b/test/TableGen/TargetInstrInfo.td
index 8299541e3c51..2871eb81df9c 100644
--- a/test/TableGen/TargetInstrInfo.td
+++ b/test/TableGen/TargetInstrInfo.td
@@ -1,6 +1,7 @@
 // This test describes how we eventually want to describe instructions in
 // the target independent code generators.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 // Target indep stuff.
 class Instruction {   // Would have other stuff eventually
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index 7c3dd579aec9..a7ca9022f848 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_pd VR128:\$src1, VR128:\$src2))\\\]} | count 1
 // RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_ps VR128:\$src1, VR128:\$src2))\\\]} | count 1
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/TemplateArgRename.td b/test/TableGen/TemplateArgRename.td
index 535c2e430129..ee5d2cf77525 100644
--- a/test/TableGen/TemplateArgRename.td
+++ b/test/TableGen/TemplateArgRename.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 // Make sure there is no collision between XX and XX.
 def S;
diff --git a/test/TableGen/Tree.td b/test/TableGen/Tree.td
index f9f1f15139d2..2796cfd3586f 100644
--- a/test/TableGen/Tree.td
+++ b/test/TableGen/Tree.td
@@ -1,5 +1,6 @@
 // This tests to make sure we can parse tree patterns.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class TreeNode;
 class RegisterClass;
diff --git a/test/TableGen/TreeNames.td b/test/TableGen/TreeNames.td
index 05a3298adb7f..ccdeb88dd02a 100644
--- a/test/TableGen/TreeNames.td
+++ b/test/TableGen/TreeNames.td
@@ -1,5 +1,6 @@
 // This tests to make sure we can parse tree patterns with names.
 // RUN: tblgen %s
+// XFAIL: vg_leak
 
 class TreeNode;
 class RegisterClass;
diff --git a/test/TableGen/UnsetBitInit.td b/test/TableGen/UnsetBitInit.td
index 91342ecb9663..ff7010868bc0 100644
--- a/test/TableGen/UnsetBitInit.td
+++ b/test/TableGen/UnsetBitInit.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
 class x {
   field bits<32> A;
 }
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index 4a771ae874fc..8164e74eae43 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {add_ps} | count 3
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
index 8ba6d7ec8335..518a80ac0d26 100644
--- a/test/TableGen/eq.td
+++ b/test/TableGen/eq.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: Value = 0
 // CHECK: Value = 1
 
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index acce4493b518..d4d81f829ed7 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,7 @@
 // RUN: tblgen %s | grep {Jr} | count 2
 // RUN: tblgen %s | grep {Sr} | count 2
 // RUN: tblgen %s | grep {NAME} | count 1
+// XFAIL: vg_leak
 
 // Variables for foreach
 class decls {
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 9b2438245db1..0bac0bac3e98 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,5 +1,6 @@
 // RUN: tblgen %s | grep {\\\[1, 2, 3\\\]} | count 4
 // RUN: tblgen %s | grep {\\\[4, 5, 6\\\]} | count 2
+// XFAIL: vg_leak
 
 class A<list<list<int>> vals> {
   list<int> first = vals[0];
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index 3e392fda84ff..b521e04c8913 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {}
+// XFAIL: vg_leak
 
 class List<list<string> n> {
   list<string> names = n;
diff --git a/test/TableGen/nameconcat.td b/test/TableGen/nameconcat.td
index fc865f9a464d..fd2880a80dff 100644
--- a/test/TableGen/nameconcat.td
+++ b/test/TableGen/nameconcat.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep {add_ps} | count 3
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index fc0d80596c92..38409a99dc4e 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | grep fufoo
+// XFAIL: vg_leak
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index ce9f45d0a481..05d424f68355 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -4,6 +4,7 @@
 // RUN: tblgen %s | grep {LAST} | count 1
 // RUN: tblgen %s | grep {TVAR} | count 2
 // RUN: tblgen %s | grep {Bogus} | count 1
+// XFAIL: vg_leak
 
 class Honorific<string t> {
   string honorific = t;
diff --git a/test/TableGen/subst2.td b/test/TableGen/subst2.td
index 3366c9d9cf7f..584266ef2335 100644
--- a/test/TableGen/subst2.td
+++ b/test/TableGen/subst2.td
@@ -1,4 +1,5 @@
 // RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: No subst
 // CHECK: No foo
 // CHECK: RECURSE foo
diff --git a/test/lit.cfg b/test/lit.cfg
index 929871a1d225..fd3120a29fe9 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -128,6 +128,10 @@ excludes = []
 # Provide target_triple for use in XFAIL and XTARGET.
 config.target_triple = site_exp['target_triplet']
 
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
 # Provide llvm_supports_target for use in local configs.
 targets = set(site_exp["TARGETS_TO_BUILD"].split())
 def llvm_supports_target(name):
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index abf5d8ef7211..813c96cc795d 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -68,12 +68,12 @@ std::string llvm::getPassesString(const std::vector<const PassInfo*> &Passes) {
 }
 
 BugDriver::BugDriver(const char *toolname, bool as_child, bool find_bugs,
-                     unsigned timeout, unsigned memlimit,
+                     unsigned timeout, unsigned memlimit, bool use_valgrind,
                      LLVMContext& ctxt)
   : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
     Program(0), Interpreter(0), SafeInterpreter(0), gcc(0),
     run_as_child(as_child), run_find_bugs(find_bugs), Timeout(timeout), 
-    MemoryLimit(memlimit)  {}
+    MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
 
 
 /// ParseInputFile - Given a bitcode or assembly input filename, parse and
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index db35c851d9a4..0a10a61c2159 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -55,6 +55,7 @@ class BugDriver {
   bool run_find_bugs;
   unsigned Timeout;
   unsigned MemoryLimit;
+  bool UseValgrind;
 
   // FIXME: sort out public/private distinctions...
   friend class ReducePassList;
@@ -62,7 +63,8 @@ class BugDriver {
 
 public:
   BugDriver(const char *toolname, bool as_child, bool find_bugs,
-            unsigned timeout, unsigned memlimit, LLVMContext& ctxt);
+            unsigned timeout, unsigned memlimit, bool use_valgrind,
+            LLVMContext& ctxt);
 
   const char *getToolName() const { return ToolName; }
 
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 9f712e097a74..3a6149b24a52 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -45,8 +45,6 @@ namespace {
   // ChildOutput - This option captures the name of the child output file that
   // is set up by the parent bugpoint process
   cl::opt<std::string> ChildOutput("child-output", cl::ReallyHidden);
-  cl::opt<bool> UseValgrind("enable-valgrind",
-                            cl::desc("Run optimizations through valgrind"));
 }
 
 /// writeProgramToFile - This writes the current "Program" to the named bitcode
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 565f3f9a7087..e14f31e67d85 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Signals.h"
+#include "llvm/System/Valgrind.h"
 #include "llvm/LinkAllVMCore.h"
 using namespace llvm;
 
@@ -48,9 +49,14 @@ TimeoutValue("timeout", cl::init(300), cl::value_desc("seconds"),
              cl::desc("Number of seconds program is allowed to run before it "
                       "is killed (default is 300s), 0 disables timeout"));
 
-static cl::opt<unsigned>
-MemoryLimit("mlimit", cl::init(100), cl::value_desc("MBytes"),
-             cl::desc("Maximum amount of memory to use. 0 disables check."));
+static cl::opt<int>
+MemoryLimit("mlimit", cl::init(-1), cl::value_desc("MBytes"),
+             cl::desc("Maximum amount of memory to use. 0 disables check."
+                      " Defaults to 100MB (800MB under valgrind)."));
+
+static cl::opt<bool>
+UseValgrind("enable-valgrind",
+            cl::desc("Run optimizations through valgrind"));
 
 // The AnalysesList is automatically populated with registered Passes by the
 // PassNameParser.
@@ -108,7 +114,17 @@ int main(int argc, char **argv) {
     outs() << "Override triple set to '" << OverrideTriple << "'\n";
   }
 
-  BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit, Context);
+  if (MemoryLimit < 0) {
+    // Set the default MemoryLimit.  Be sure to update the flag's description if
+    // you change this.
+    if (sys::RunningOnValgrind() || UseValgrind)
+      MemoryLimit = 800;
+    else
+      MemoryLimit = 100;
+  }
+
+  BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit,
+              UseValgrind, Context);
   if (D.addSources(InputFilenames)) return 1;
   
   AddToDriver PM(D);
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index a3c587966878..cd8f4b067ae8 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -39,11 +39,12 @@ ifeq ($(HOST_OS),Darwin)
                          -Wl,-seg1addr -Wl,0xE0000000 
 
     # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+    # Path is /Developer/usr/local/lib for now; will use an rpath-based mechanism soon
     DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
     ifneq ($(DARWIN_VERS),8)
        LLVMLibsOptions    := $(LLVMLibsOptions)  \
                             -no-undefined -Wl,-install_name \
-                            -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+                            -Wl,"/Developer/usr/local/lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
     endif
 endif
 
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index dbfe7a5d06f0..0caf539ac959 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -47,8 +47,8 @@ public:
 };
 }
 
-static bool PrintInsts(const llvm::MCDisassembler &DisAsm,
-                      llvm::MCInstPrinter &Printer, const ByteArrayTy &Bytes,
+static bool PrintInsts(const MCDisassembler &DisAsm,
+                      MCInstPrinter &Printer, const ByteArrayTy &Bytes,
                       SourceMgr &SM) {
   // Wrap the vector in a MemoryObject.
   VectorMemoryObject memoryObject(Bytes);
@@ -77,24 +77,23 @@ static bool PrintInsts(const llvm::MCDisassembler &DisAsm,
 }
 
 int Disassembler::disassemble(const Target &T, const std::string &Triple,
-                                 MemoryBuffer &Buffer) {
+                              MemoryBuffer &Buffer) {
   // Set up disassembler.
-  llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
+  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
   
   if (!AsmInfo) {
     errs() << "error: no assembly info for target " << Triple << "\n";
     return -1;
   }
   
-  llvm::OwningPtr<const llvm::MCDisassembler> DisAsm(T.createMCDisassembler());
+  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
   if (!DisAsm) {
     errs() << "error: no disassembler for target " << Triple << "\n";
     return -1;
   }
   
-  llvm::MCInstPrinter *InstPrinter = T.createMCInstPrinter(0, *AsmInfo, outs());
-  
-  if (!InstPrinter) {
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(0, *AsmInfo, outs()));
+  if (!IP) {
     errs() << "error: no instruction printer for target " << Triple << '\n';
     return -1;
   }
@@ -151,7 +150,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
   }
   
   if (!ByteArray.empty())
-    ErrorOccurred |= PrintInsts(*DisAsm, *InstPrinter, ByteArray, SM);
+    ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM);
     
   return ErrorOccurred;
 }
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 66e126092db2..3c23990af6bf 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -152,7 +152,7 @@ static int AsLexInput(const char *ProgName) {
   if (!TheTarget)
     return 1;
 
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  llvm::OwningPtr<MCAsmInfo> MAI((MCAsmInfo*) TheTarget->createAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
 
   AsmLexer Lexer(*MAI);
@@ -260,7 +260,7 @@ static int AssembleInput(const char *ProgName) {
   SrcMgr.setIncludeDirs(IncludeDirs);
   
   
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  llvm::OwningPtr<MCAsmInfo> MAI((MCAsmInfo*) TheTarget->createAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
   
   MCContext Ctx(*MAI);
@@ -278,18 +278,17 @@ static int AssembleInput(const char *ProgName) {
     return 1;
   }
 
-  OwningPtr<MCInstPrinter> IP;
   OwningPtr<MCCodeEmitter> CE;
   OwningPtr<MCStreamer> Str;
   OwningPtr<TargetAsmBackend> TAB;
 
   if (FileType == OFT_AssemblyFile) {
-    IP.reset(TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out));
+    MCInstPrinter *IP =
+      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *Out);
     if (ShowEncoding)
       CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
     Str.reset(createAsmStreamer(Ctx, *Out,TM->getTargetData()->isLittleEndian(),
-                                /*asmverbose*/true, IP.get(), CE.get(),
-                                ShowInst));
+                                /*asmverbose*/true, IP, CE.get(), ShowInst));
   } else {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
     CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
@@ -319,13 +318,9 @@ static int AssembleInput(const char *ProgName) {
 }
 
 static int DisassembleInput(const char *ProgName) {
-  std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
-  if (TheTarget == 0) {
-    errs() << ProgName << ": error: unable to get target for '" << TripleName
-    << "', see --version and --triple.\n";
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
     return 0;
-  }
   
   std::string ErrorMessage;
   
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 571996013e36..964b04da473c 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -374,6 +374,7 @@ TEST(APFloatTest, makeNaN) {
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(APFloatTest, SemanticsDeath) {
   EXPECT_DEATH(APFloat(APFloat::IEEEsingle, 0.0f).convertToDouble(), "Float semantics are not IEEEdouble");
   EXPECT_DEATH(APFloat(APFloat::IEEEdouble, 0.0 ).convertToFloat(),  "Float semantics are not IEEEsingle");
@@ -573,5 +574,6 @@ TEST(APFloatTest, StringHexadecimalExponentDeath) {
   EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p-"), "Exponent has no digits");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 0b13aa402ea1..d08e86abaa75 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -328,6 +328,7 @@ TEST(APIntTest, Log2) {
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(APIntTest, StringDeath) {
   EXPECT_DEATH(APInt(0, "", 0), "Bitwidth too small");
   EXPECT_DEATH(APInt(32, "", 0), "Invalid string length");
@@ -340,5 +341,6 @@ TEST(APIntTest, StringDeath) {
   EXPECT_DEATH(APInt(32, "1L", 10), "Invalid character in digit string");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 8a817966cb8f..d7dc3af52feb 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -384,7 +384,7 @@ TEST_F(SmallVectorTest, ConstVectorTest) {
 // Direct array access.
 TEST_F(SmallVectorTest, DirectVectorTest) {
   EXPECT_EQ(0u, theVector.size());
-  EXPECT_EQ(4u, theVector.capacity());
+  EXPECT_LE(4u, theVector.capacity());
   EXPECT_EQ(0, Constructable::getNumConstructorCalls());
   theVector.end()[0] = 1;
   theVector.end()[1] = 2;
diff --git a/unittests/Support/LeakDetectorTest.cpp b/unittests/Support/LeakDetectorTest.cpp
index 85ef04676dcd..d198c7a8bda0 100644
--- a/unittests/Support/LeakDetectorTest.cpp
+++ b/unittests/Support/LeakDetectorTest.cpp
@@ -15,6 +15,7 @@ using namespace llvm;
 namespace {
 
 #ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
 TEST(LeakDetector, Death1) {
   LeakDetector::addGarbageObject((void*) 1);
   LeakDetector::addGarbageObject((void*) 2);
@@ -25,5 +26,6 @@ TEST(LeakDetector, Death1) {
                "Cache != o && \"Object already in set!\"");
 }
 #endif
+#endif
 
 }
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index 2d98cad27fb8..c1baa74487aa 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -8,8 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Instructions.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
@@ -20,11 +22,13 @@ TEST(InstructionsTest, ReturnInst) {
 
   // test for PR6589
   const ReturnInst* r0 = ReturnInst::Create(C);
+  EXPECT_EQ(r0->getNumOperands(), 0U);
   EXPECT_EQ(r0->op_begin(), r0->op_end());
 
   const IntegerType* Int1 = IntegerType::get(C, 1);
   Constant* One = ConstantInt::get(Int1, 1, true);
   const ReturnInst* r1 = ReturnInst::Create(C, One);
+  EXPECT_EQ(r1->getNumOperands(), 1U);
   User::const_op_iterator b(r1->op_begin());
   EXPECT_NE(b, r1->op_end());
   EXPECT_EQ(*b, One);
@@ -37,5 +41,88 @@ TEST(InstructionsTest, ReturnInst) {
   delete r1;
 }
 
+TEST(InstructionsTest, BranchInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // Make a BasicBlocks
+  BasicBlock* bb0 = BasicBlock::Create(C);
+  BasicBlock* bb1 = BasicBlock::Create(C);
+
+  // Mandatory BranchInst
+  const BranchInst* b0 = BranchInst::Create(bb0);
+
+  EXPECT_TRUE(b0->isUnconditional());
+  EXPECT_FALSE(b0->isConditional());
+  EXPECT_EQ(b0->getNumSuccessors(), 1U);
+
+  // check num operands
+  EXPECT_EQ(b0->getNumOperands(), 1U);
+
+  EXPECT_NE(b0->op_begin(), b0->op_end());
+  EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+
+  EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+
+  const IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+
+  // Conditional BranchInst
+  BranchInst* b1 = BranchInst::Create(bb0, bb1, One);
+
+  EXPECT_FALSE(b1->isUnconditional());
+  EXPECT_TRUE(b1->isConditional());
+  EXPECT_EQ(b1->getNumSuccessors(), 2U);
+
+  // check num operands
+  EXPECT_EQ(b1->getNumOperands(), 3U);
+
+  User::const_op_iterator b(b1->op_begin());
+
+  // check COND
+  EXPECT_NE(b, b1->op_end());
+  EXPECT_EQ(*b, One);
+  EXPECT_EQ(b1->getOperand(0), One);
+  EXPECT_EQ(b1->getCondition(), One);
+  ++b;
+
+  // check ELSE
+  EXPECT_EQ(*b, bb1);
+  EXPECT_EQ(b1->getOperand(1), bb1);
+  EXPECT_EQ(b1->getSuccessor(1), bb1);
+  ++b;
+
+  // check THEN
+  EXPECT_EQ(*b, bb0);
+  EXPECT_EQ(b1->getOperand(2), bb0);
+  EXPECT_EQ(b1->getSuccessor(0), bb0);
+  ++b;
+
+  EXPECT_EQ(b, b1->op_end());
+
+  // shrink it
+  b1->setUnconditionalDest(bb1);
+
+  // check num operands
+  EXPECT_EQ(b1->getNumOperands(), 1U);
+
+  User::const_op_iterator c(b1->op_begin());
+  EXPECT_NE(c, b1->op_end());
+
+  // check THEN
+  EXPECT_EQ(*c, bb1);
+  EXPECT_EQ(b1->getOperand(0), bb1);
+  EXPECT_EQ(b1->getSuccessor(0), bb1);
+  ++c;
+
+  EXPECT_EQ(c, b1->op_end());
+
+  // clean up
+  delete b0;
+  delete b1;
+
+  delete bb0;
+  delete bb1;
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 3c4742cc36fb..c6a139275899 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -401,11 +401,12 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
     }
   }
 
-  if (Best != StringRef::npos && BestQuality < 50) {
-    // Print the "possible intended match here" line if we found something
-    // reasonable.
-    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
-                    "possible intended match here", "note");
+  // Print the "possible intended match here" line if we found something
+  // reasonable and not equal to what we showed in the "scanning from here"
+  // line.
+  if (Best && Best != StringRef::npos && BestQuality < 50) {
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
+                      "possible intended match here", "note");
 
     // FIXME: If we wanted to be really friendly we would show why the match
     // failed, as it can be hard to spot simple one character differences.
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index a3063824f8fb..4287cc1da543 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -24,6 +24,7 @@ use Socket;
 #                   IMPLEMENTED.
 #  -nickname NAME   The NAME argument specifieds the nickname this script
 #                   will submit to the nightlytest results repository.
+#  -nouname         Don't include uname data (machine will be identified by nickname only).
 #  -submit-server   Specifies a server to submit the test results too. If this
 #                   option is not specified it defaults to
 #                   llvm.org. This is basically just the address of the
@@ -220,6 +221,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
                              $LLVMGCCPATH = $ARGV[0] . '/bin';
                              shift; next;}
   if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
+  if (/^-nouname$/)        { $NOUNAME = 1; next; }
   if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
   if (/^-extraflags/)      { $CONFIGUREARGS .=
                              " --with-extra-options=\'$ARGV[0]\'"; shift; next;}
@@ -693,12 +695,21 @@ $endtime = `date "+20%y-%m-%d %H:%M:%S"`;
 
 if ( $VERBOSE ) { print "PREPARING LOGS TO BE SENT TO SERVER\n"; }
 
-$machine_data = "uname: ".`uname -a`.
-                "hardware: ".`uname -m`.
-                "os: ".`uname -sr`.
-                "name: ".`uname -n`.
-                "date: ".`date \"+20%y-%m-%d\"`.
-                "time: ".`date +\"%H:%M:%S\"`;
+if ( ! $NOUNAME ) {
+    $machine_data = "uname: ".`uname -a`.
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: ".`uname -n`.
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+} else {
+    $machine_data = "uname: (excluded)\n".
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: $nickname\n".
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+}
 
 # Get gcc version.
 my $gcc_version_long = "";
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index b823e57b3779..e5c068bcdf63 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -844,19 +844,20 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
   std::set<std::string> SingletonRegisterNames;
-  for (std::map<std::string, CodeGenInstruction>::const_iterator 
-         it = Target.getInstructions().begin(), 
-         ie = Target.getInstructions().end(); 
-       it != ie; ++it) {
-    const CodeGenInstruction &CGI = it->second;
+  
+  const std::vector<const CodeGenInstruction*> &InstrList =
+    Target.getInstructionsByEnumValue();
+  
+  for (unsigned i = 0, e = InstrList.size(); i != e; ++i) {
+    const CodeGenInstruction &CGI = *InstrList[i];
 
-    if (!StringRef(it->first).startswith(MatchPrefix))
+    if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
       continue;
 
-    OwningPtr<InstructionInfo> II(new InstructionInfo);
+    OwningPtr<InstructionInfo> II(new InstructionInfo());
     
-    II->InstrName = it->first;
-    II->Instr = &it->second;
+    II->InstrName = CGI.TheDef->getName();
+    II->Instr = &CGI;
     II->AsmString = FlattenVariants(CGI.AsmString, 0);
 
     // Remove comments from the asm string.
@@ -869,7 +870,7 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
     TokenizeAsmString(II->AsmString, II->Tokens);
 
     // Ignore instructions which shouldn't be matched.
-    if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
+    if (!IsAssemblerInstruction(CGI.TheDef->getName(), CGI, II->Tokens))
       continue;
 
     // Collect singleton registers, if used.
@@ -998,7 +999,7 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
 
   // Start the unified conversion function.
 
-  CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
+  CvtOS << "static void ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
         << "unsigned Opcode,\n"
         << "                      const SmallVectorImpl<MCParsedAsmOperand*"
         << "> &Operands) {\n";
@@ -1155,13 +1156,12 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
       }
     }
 
-    CvtOS << "    break;\n";
+    CvtOS << "    return;\n";
   }
 
   // Finish the convert function.
 
   CvtOS << "  }\n";
-  CvtOS << "  return false;\n";
   CvtOS << "}\n\n";
 
   // Finish the enum, and drop the convert function after it.
@@ -1634,8 +1634,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "      continue;\n";
   }
   OS << "\n";
-  OS << "    return ConvertToMCInst(it->ConvertFn, Inst, "
-     << "it->Opcode, Operands);\n";
+  OS << "    ConvertToMCInst(it->ConvertFn, Inst, it->Opcode, Operands);\n";
+
+  // Call the post-processing function, if used.
+  std::string InsnCleanupFn =
+    AsmParser->getValueAsString("AsmParserInstCleanup");
+  if (!InsnCleanupFn.empty())
+    OS << "    " << InsnCleanupFn << "(Inst);\n";
+
+  OS << "    return false;\n";
   OS << "  }\n\n";
 
   OS << "  return true;\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 3a38dd450eee..ab1e239a95f2 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -254,16 +254,16 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I)
-    if (!I->second.AsmString.empty() &&
-        I->second.TheDef->getName() != "PHI")
+    if (!(*I)->AsmString.empty() &&
+        (*I)->TheDef->getName() != "PHI")
       Instructions.push_back(
-        AsmWriterInst(I->second, 
+        AsmWriterInst(**I, 
                       AsmWriter->getValueAsInt("Variant"),
                       AsmWriter->getValueAsInt("FirstOperandColumn"),
                       AsmWriter->getValueAsInt("OperandSpacing")));
 
   // Get the instruction numbering.
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  NumberedInstructions = Target.getInstructionsByEnumValue();
   
   // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
   // all machine instructions are necessarily being printed, so there may be
@@ -499,8 +499,8 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
 
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
   
   StringToOffsetTable StringTable;
   O <<
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 6f1080eb5eb2..27b16544ce1d 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -34,7 +34,7 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
     OS << "__" << ComponentName << "START = DIAG_START_" << ComponentName
        << ",\n";
     OS << "#undef " << ComponentName << "START\n";
-    OS << "#endif\n";
+    OS << "#endif\n\n";
   }
 
   const std::vector<Record*> &Diags =
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index f1857f506e62..641c22433a19 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -86,8 +86,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
   EmitSourceFileHeader("Machine Code Emitter", o);
   std::string Namespace = Insts[0]->getValueAsString("Namespace") + "::";
   
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   // Emit function declaration
   o << "unsigned " << Target.getName() << "CodeEmitter::"
@@ -95,7 +95,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
   // Emit instruction base values
   o << "  static const unsigned InstBits[] = {\n";
-  for (std::vector<const CodeGenInstruction*>::iterator
+  for (std::vector<const CodeGenInstruction*>::const_iterator
           IN = NumberedInstructions.begin(),
           EN = NumberedInstructions.end();
        IN != EN; ++IN) {
@@ -156,7 +156,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
     const std::vector<RecordVal> &Vals = R->getValues();
-    CodeGenInstruction &CGI = Target.getInstruction(InstName);
+    CodeGenInstruction &CGI = Target.getInstruction(R);
     
     // Loop over all of the fields in the instruction, determining which are the
     // operands to the instruction.
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 6e894a46bce4..4cc9b79a291e 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -25,19 +25,18 @@ using namespace llvm;
 //  EEVT::TypeSet Implementation
 //===----------------------------------------------------------------------===//
 
-// FIXME: Remove EEVT::isUnknown!
-
 static inline bool isInteger(MVT::SimpleValueType VT) {
   return EVT(VT).isInteger();
 }
-
 static inline bool isFloatingPoint(MVT::SimpleValueType VT) {
   return EVT(VT).isFloatingPoint();
 }
-
 static inline bool isVector(MVT::SimpleValueType VT) {
   return EVT(VT).isVector();
 }
+static inline bool isScalar(MVT::SimpleValueType VT) {
+  return !EVT(VT).isVector();
+}
 
 EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
   if (VT == MVT::iAny)
@@ -67,6 +66,32 @@ EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) {
   TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
 }
 
+/// FillWithPossibleTypes - Set to all legal types and return true, only valid
+/// on completely unknown type sets.
+bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
+                                          bool (*Pred)(MVT::SimpleValueType),
+                                          const char *PredicateName) {
+  assert(isCompletelyUnknown());
+  const std::vector<MVT::SimpleValueType> &LegalTypes = 
+    TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
+  
+  for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
+    if (Pred == 0 || Pred(LegalTypes[i]))
+      TypeVec.push_back(LegalTypes[i]);
+
+  // If we have nothing that matches the predicate, bail out.
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, no " +
+             std::string(PredicateName) + " types found");  
+  // No need to sort with one element.
+  if (TypeVec.size() == 1) return true;
+
+  // Remove duplicates.
+  array_pod_sort(TypeVec.begin(), TypeVec.end());
+  TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
+  
+  return true;
+}
 
 /// hasIntegerTypes - Return true if this TypeSet contains iAny or an
 /// integer value type.
@@ -97,7 +122,7 @@ bool EEVT::TypeSet::hasVectorTypes() const {
 
 
 std::string EEVT::TypeSet::getName() const {
-  if (TypeVec.empty()) return "isUnknown";
+  if (TypeVec.empty()) return "<empty>";
   
   std::string Result;
     
@@ -200,94 +225,84 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
 
 /// EnforceInteger - Remove all non-integer types from this set.
 bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isInteger, "integer");
   if (!hasFloatingPointTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isFloatingPoint(TypeVec[i]))
+    if (!isInteger(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be integer");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceFloatingPoint - Remove all integer types from this set.
 bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isFloatingPoint, "floating point");
+
   if (!hasIntegerTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isInteger(TypeVec[i]))
+    if (!isFloatingPoint(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be floating point");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceScalar - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-  
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isScalar, "scalar");
+
   if (!hasVectorTypes())
-    return MadeChange;
+    return false;
+
+  TypeSet InputSet(*this);
   
   // Filter out all the vector types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (isVector(TypeVec[i]))
+    if (!isScalar(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be scalar");
-  return MadeChange;
+  return true;
 }
 
 /// EnforceVector - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isVector, "vector");
+
   TypeSet InputSet(*this);
   bool MadeChange = false;
   
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
-  
   // Filter out all the scalar types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isVector(TypeVec[i]))
+    if (!isVector(TypeVec[i])) {
       TypeVec.erase(TypeVec.begin()+i--);
+      MadeChange = true;
+    }
   
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
@@ -296,72 +311,86 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
 }
 
 
+
 /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
 /// this an other based on this information.
 bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   // Both operands must be integer or FP, but we don't care which.
   bool MadeChange = false;
   
+  if (isCompletelyUnknown())
+    MadeChange = FillWithPossibleTypes(TP);
+
+  if (Other.isCompletelyUnknown())
+    MadeChange = Other.FillWithPossibleTypes(TP);
+    
+  // If one side is known to be integer or known to be FP but the other side has
+  // no information, get at least the type integrality info in there.
+  if (!hasFloatingPointTypes())
+    MadeChange |= Other.EnforceInteger(TP);
+  else if (!hasIntegerTypes())
+    MadeChange |= Other.EnforceFloatingPoint(TP);
+  if (!Other.hasFloatingPointTypes())
+    MadeChange |= EnforceInteger(TP);
+  else if (!Other.hasIntegerTypes())
+    MadeChange |= EnforceFloatingPoint(TP);
+  
+  assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
+         "Should have a type list now");
+  
+  // If one contains vectors but the other doesn't pull vectors out.
+  if (!hasVectorTypes())
+    MadeChange |= Other.EnforceScalar(TP);
+  if (!hasVectorTypes())
+    MadeChange |= EnforceScalar(TP);
+  
   // This code does not currently handle nodes which have multiple types,
   // where some types are integer, and some are fp.  Assert that this is not
   // the case.
   assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
          !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
          "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
-  // If one side is known to be integer or known to be FP but the other side has
-  // no information, get at least the type integrality info in there.
-  if (hasIntegerTypes())
-    MadeChange |= Other.EnforceInteger(TP);
-  else if (hasFloatingPointTypes())
-    MadeChange |= Other.EnforceFloatingPoint(TP);
-  if (Other.hasIntegerTypes())
-    MadeChange |= EnforceInteger(TP);
-  else if (Other.hasFloatingPointTypes())
-    MadeChange |= EnforceFloatingPoint(TP);
   
-  assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
-         "Should have a type list now");
+  // Okay, find the smallest type from the current set and remove it from the
+  // largest set.
+  MVT::SimpleValueType Smallest = TypeVec[0];
+  for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
+    if (TypeVec[i] < Smallest)
+      Smallest = TypeVec[i];
   
-  // If one contains vectors but the other doesn't pull vectors out.
-  if (!hasVectorTypes() && Other.hasVectorTypes())
-    MadeChange |= Other.EnforceScalar(TP);
-  if (hasVectorTypes() && !Other.hasVectorTypes())
-    MadeChange |= EnforceScalar(TP);
+  // If this is the only type in the large set, the constraint can never be
+  // satisfied.
+  if (Other.TypeVec.size() == 1 && Other.TypeVec[0] == Smallest)
+    TP.error("Type inference contradiction found, '" +
+             Other.getName() + "' has nothing larger than '" + getName() +"'!");
   
-  // FIXME: This is a bone-headed way to do this.
+  SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+    std::find(Other.TypeVec.begin(), Other.TypeVec.end(), Smallest);
+  if (TVI != Other.TypeVec.end()) {
+    Other.TypeVec.erase(TVI);
+    MadeChange = true;
+  }
   
-  // Get the set of legal VTs and filter it based on the known integrality.
-  const CodeGenTarget &CGT = TP.getDAGPatterns().getTargetInfo();
-  TypeSet LegalVTs = CGT.getLegalValueTypes();
-
-  // TODO: If one or the other side is known to be a specific VT, we could prune
-  // LegalVTs.
-  if (hasIntegerTypes())
-    LegalVTs.EnforceInteger(TP);
-  else if (hasFloatingPointTypes())
-    LegalVTs.EnforceFloatingPoint(TP);
-  else
-    return MadeChange;
+  // Okay, find the largest type in the Other set and remove it from the
+  // current set.
+  MVT::SimpleValueType Largest = Other.TypeVec[0];
+  for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
+    if (Other.TypeVec[i] > Largest)
+      Largest = Other.TypeVec[i];
   
-  switch (LegalVTs.TypeVec.size()) {
-  case 0: assert(0 && "No legal VTs?");
-  default:         // Too many VT's to pick from.
-    // TODO: If the biggest type in LegalVTs is in this set, we could remove it.
-    // If one or the other side is known to be a specific VT, we could prune
-    // LegalVTs.
-    return MadeChange;
-  case 1: 
-    // Only one VT of this flavor.  Cannot ever satisfy the constraints.
-    return MergeInTypeInfo(MVT::Other, TP);  // throw
-  case 2:
-    // If we have exactly two possible types, the little operand must be the
-    // small one, the big operand should be the big one.  This is common with 
-    // float/double for example.
-    assert(LegalVTs.TypeVec[0] < LegalVTs.TypeVec[1] && "Should be sorted!");
-    MadeChange |= MergeInTypeInfo(LegalVTs.TypeVec[0], TP);
-    MadeChange |= Other.MergeInTypeInfo(LegalVTs.TypeVec[1], TP);
-    return MadeChange;
-  }    
+  // If this is the only type in the small set, the constraint can never be
+  // satisfied.
+  if (TypeVec.size() == 1 && TypeVec[0] == Largest)
+    TP.error("Type inference contradiction found, '" +
+             getName() + "' has nothing smaller than '" + Other.getName()+"'!");
+  
+  TVI = std::find(TypeVec.begin(), TypeVec.end(), Largest);
+  if (TVI != TypeVec.end()) {
+    TypeVec.erase(TVI);
+    MadeChange = true;
+  }
+  
+  return MadeChange;
 }
 
 /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
@@ -372,10 +401,8 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
   bool MadeChange = false;
   
   // If we know nothing, then get the full set.
-  if (TypeVec.empty()) {
-    *this = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-    MadeChange = true;
-  }
+  if (TypeVec.empty())
+    MadeChange = FillWithPossibleTypes(TP, isVector, "vector");
   
   // Filter out all the non-vector types and types which don't have the right
   // element type.
@@ -511,24 +538,27 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
 }
 
 /// getOperandNum - Return the node corresponding to operand #OpNo in tree
-/// N, which has NumResults results.
-TreePatternNode *SDTypeConstraint::getOperandNum(unsigned OpNo,
-                                                 TreePatternNode *N,
-                                                 unsigned NumResults) const {
-  assert(NumResults <= 1 &&
-         "We only work with nodes with zero or one result so far!");
+/// N, and the result number in ResNo.
+static TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
+                                      const SDNodeInfo &NodeInfo,
+                                      unsigned &ResNo) {
+  unsigned NumResults = NodeInfo.getNumResults();
+  if (OpNo < NumResults) {
+    ResNo = OpNo;
+    return N;
+  }
   
-  if (OpNo >= (NumResults + N->getNumChildren())) {
-    errs() << "Invalid operand number " << OpNo << " ";
+  OpNo -= NumResults;
+  
+  if (OpNo >= N->getNumChildren()) {
+    errs() << "Invalid operand number in type constraint " 
+           << (OpNo+NumResults) << " ";
     N->dump();
     errs() << '\n';
     exit(1);
   }
 
-  if (OpNo < NumResults)
-    return N;  // FIXME: need value #
-  else
-    return N->getChild(OpNo-NumResults);
+  return N->getChild(OpNo);
 }
 
 /// ApplyTypeConstraint - Given a node in a pattern, apply this type
@@ -538,10 +568,6 @@ TreePatternNode *SDTypeConstraint::getOperandNum(unsigned OpNo,
 bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                                            const SDNodeInfo &NodeInfo,
                                            TreePattern &TP) const {
-  unsigned NumResults = NodeInfo.getNumResults();
-  assert(NumResults <= 1 &&
-         "We only work with nodes with zero or one result so far!");
-  
   // Check that the number of operands is sane.  Negative operands -> varargs.
   if (NodeInfo.getNumOperands() >= 0) {
     if (N->getNumChildren() != (unsigned)NodeInfo.getNumOperands())
@@ -549,30 +575,32 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                itostr(NodeInfo.getNumOperands()) + " operands!");
   }
 
-  TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NumResults);
+  unsigned ResNo = 0; // The result number being referenced.
+  TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
   
   switch (ConstraintType) {
   default: assert(0 && "Unknown constraint type!");
   case SDTCisVT:
     // Operand must be a particular type.
-    return NodeToApply->UpdateNodeType(x.SDTCisVT_Info.VT, TP);
+    return NodeToApply->UpdateNodeType(ResNo, x.SDTCisVT_Info.VT, TP);
   case SDTCisPtrTy:
     // Operand must be same as target pointer type.
-    return NodeToApply->UpdateNodeType(MVT::iPTR, TP);
+    return NodeToApply->UpdateNodeType(ResNo, MVT::iPTR, TP);
   case SDTCisInt:
     // Require it to be one of the legal integer VTs.
-    return NodeToApply->getExtType().EnforceInteger(TP);
+    return NodeToApply->getExtType(ResNo).EnforceInteger(TP);
   case SDTCisFP:
     // Require it to be one of the legal fp VTs.
-    return NodeToApply->getExtType().EnforceFloatingPoint(TP);
+    return NodeToApply->getExtType(ResNo).EnforceFloatingPoint(TP);
   case SDTCisVec:
     // Require it to be one of the legal vector VTs.
-    return NodeToApply->getExtType().EnforceVector(TP);
+    return NodeToApply->getExtType(ResNo).EnforceVector(TP);
   case SDTCisSameAs: {
+    unsigned OResNo = 0;
     TreePatternNode *OtherNode =
-      getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NumResults);
-    return NodeToApply->UpdateNodeType(OtherNode->getExtType(), TP) |
-           OtherNode->UpdateNodeType(NodeToApply->getExtType(), TP);
+      getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    return NodeToApply->UpdateNodeType(OResNo, OtherNode->getExtType(ResNo),TP)|
+           OtherNode->UpdateNodeType(ResNo,NodeToApply->getExtType(OResNo),TP);
   }
   case SDTCisVTSmallerThanOp: {
     // The NodeToApply must be a leaf node that is a VT.  OtherOperandNum must
@@ -587,40 +615,47 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     if (!isInteger(VT))
       TP.error(N->getOperator()->getName() + " VT operand must be integer!");
     
+    unsigned OResNo = 0;
     TreePatternNode *OtherNode =
-      getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N,NumResults);
+      getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
+                    OResNo);
     
     // It must be integer.
-    bool MadeChange = OtherNode->getExtType().EnforceInteger(TP);
+    bool MadeChange = OtherNode->getExtType(OResNo).EnforceInteger(TP);
 
     // This doesn't try to enforce any information on the OtherNode, it just
     // validates it when information is determined.
-    if (OtherNode->hasTypeSet() && OtherNode->getType() <= VT)
-      OtherNode->UpdateNodeType(MVT::Other, TP);  // Throw an error.
+    if (OtherNode->hasTypeSet(OResNo) && OtherNode->getType(OResNo) <= VT)
+      OtherNode->UpdateNodeType(OResNo, MVT::Other, TP);  // Throw an error.
     return MadeChange;
   }
   case SDTCisOpSmallerThanOp: {
+    unsigned BResNo = 0;
     TreePatternNode *BigOperand =
-      getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NumResults);
-    return NodeToApply->getExtType().
-                  EnforceSmallerThan(BigOperand->getExtType(), TP);
+      getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NodeInfo,
+                    BResNo);
+    return NodeToApply->getExtType(ResNo).
+                  EnforceSmallerThan(BigOperand->getExtType(BResNo), TP);
   }
   case SDTCisEltOfVec: {
+    unsigned VResNo = 0;
     TreePatternNode *VecOperand =
-      getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NumResults);
-    if (VecOperand->hasTypeSet()) {
-      if (!isVector(VecOperand->getType()))
+      getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo,
+                    VResNo);
+    if (VecOperand->hasTypeSet(VResNo)) {
+      if (!isVector(VecOperand->getType(VResNo)))
         TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
-      EVT IVT = VecOperand->getType();
+      EVT IVT = VecOperand->getType(VResNo);
       IVT = IVT.getVectorElementType();
-      return NodeToApply->UpdateNodeType(IVT.getSimpleVT().SimpleTy, TP);
+      return NodeToApply->UpdateNodeType(ResNo, IVT.getSimpleVT().SimpleTy, TP);
     }
     
-    if (NodeToApply->hasTypeSet() && VecOperand->getExtType().hasVectorTypes()){
+    if (NodeToApply->hasTypeSet(ResNo) &&
+        VecOperand->getExtType(VResNo).hasVectorTypes()){
       // Filter vector types out of VecOperand that don't have the right element
       // type.
-      return VecOperand->getExtType().
-        EnforceVectorEltTypeIs(NodeToApply->getType(), TP);
+      return VecOperand->getExtType(VResNo).
+        EnforceVectorEltTypeIs(NodeToApply->getType(ResNo), TP);
     }
     return false;
   }
@@ -662,6 +697,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
       Properties |= 1 << SDNPSideEffect;
     } else if (PropList[i]->getName() == "SDNPMemOperand") {
       Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
     } else {
       errs() << "Unknown SD Node property '" << PropList[i]->getName()
              << "' on node '" << R->getName() << "'!\n";
@@ -678,8 +715,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
 
 /// getKnownType - If the type constraints on this node imply a fixed type
 /// (e.g. all stores return void, etc), then return it as an
-/// MVT::SimpleValueType.  Otherwise, return EEVT::isUnknown.
-unsigned SDNodeInfo::getKnownType() const {
+/// MVT::SimpleValueType.  Otherwise, return EEVT::Other.
+MVT::SimpleValueType SDNodeInfo::getKnownType() const {
   unsigned NumResults = getNumResults();
   assert(NumResults <= 1 &&
          "We only work with nodes with zero or one result so far!");
@@ -697,7 +734,7 @@ unsigned SDNodeInfo::getKnownType() const {
       return MVT::iPTR;
     }
   }
-  return EEVT::isUnknown;
+  return MVT::Other;
 }
 
 //===----------------------------------------------------------------------===//
@@ -711,17 +748,73 @@ TreePatternNode::~TreePatternNode() {
 #endif
 }
 
-
-
-void TreePatternNode::print(raw_ostream &OS) const {
-  if (isLeaf()) {
-    OS << *getLeafValue();
-  } else {
-    OS << '(' << getOperator()->getName();
+static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
+  if (Operator->getName() == "set" ||
+      Operator->getName() == "implicit" ||
+      Operator->getName() == "parallel")
+    return 0;  // All return nothing.
+  
+  if (Operator->isSubClassOf("Intrinsic")) {
+    unsigned NumRes = CDP.getIntrinsic(Operator).IS.RetVTs.size();
+    if (NumRes == 1 && CDP.getIntrinsic(Operator).IS.RetVTs[0] == MVT::isVoid)
+      return 0;
+    return NumRes;
   }
   
-  if (!isTypeCompletelyUnknown())
-    OS << ':' << getExtType().getName();
+  if (Operator->isSubClassOf("SDNode"))
+    return CDP.getSDNodeInfo(Operator).getNumResults();
+  
+  if (Operator->isSubClassOf("PatFrag")) {
+    // If we've already parsed this pattern fragment, get it.  Otherwise, handle
+    // the forward reference case where one pattern fragment references another
+    // before it is processed.
+    if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator))
+      return PFRec->getOnlyTree()->getNumTypes();
+    
+    // Get the result tree.
+    DagInit *Tree = Operator->getValueAsDag("Fragment");
+    Record *Op = 0;
+    if (Tree && dynamic_cast<DefInit*>(Tree->getOperator()))
+      Op = dynamic_cast<DefInit*>(Tree->getOperator())->getDef();
+    assert(Op && "Invalid Fragment");
+    return GetNumNodeResults(Op, CDP);
+  }
+  
+  if (Operator->isSubClassOf("Instruction")) {
+    CodeGenInstruction &InstInfo = CDP.getTargetInfo().getInstruction(Operator);
+    
+    // FIXME: Handle implicit defs right.
+    if (InstInfo.NumDefs != 0)
+      return 1;     // FIXME: Handle inst results right!
+    
+    if (!InstInfo.ImplicitDefs.empty()) {
+      // Add on one implicit def if it has a resolvable type.
+      Record *FirstImplicitDef = InstInfo.ImplicitDefs[0];
+      assert(FirstImplicitDef->isSubClassOf("Register"));
+      const std::vector<MVT::SimpleValueType> &RegVTs = 
+      CDP.getTargetInfo().getRegisterVTs(FirstImplicitDef);
+      if (RegVTs.size() == 1)
+        return 1;
+    }
+    return 0;
+  }
+  
+  if (Operator->isSubClassOf("SDNodeXForm"))
+    return 1;  // FIXME: Generalize SDNodeXForm
+  
+  Operator->dump();
+  errs() << "Unhandled node in GetNumNodeResults\n";
+  exit(1);
+}
+
+void TreePatternNode::print(raw_ostream &OS) const {
+  if (isLeaf())
+    OS << *getLeafValue();
+  else
+    OS << '(' << getOperator()->getName();
+
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    OS << ':' << getExtType(i).getName();
 
   if (!isLeaf()) {
     if (getNumChildren() != 0) {
@@ -757,7 +850,7 @@ void TreePatternNode::dump() const {
 bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
                                      const MultipleUseVarSet &DepVars) const {
   if (N == this) return true;
-  if (N->isLeaf() != isLeaf() || getExtType() != N->getExtType() ||
+  if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
       getPredicateFns() != N->getPredicateFns() ||
       getTransformFn() != N->getTransformFn())
     return false;
@@ -786,16 +879,16 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
 TreePatternNode *TreePatternNode::clone() const {
   TreePatternNode *New;
   if (isLeaf()) {
-    New = new TreePatternNode(getLeafValue());
+    New = new TreePatternNode(getLeafValue(), getNumTypes());
   } else {
     std::vector<TreePatternNode*> CChildren;
     CChildren.reserve(Children.size());
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       CChildren.push_back(getChild(i)->clone());
-    New = new TreePatternNode(getOperator(), CChildren);
+    New = new TreePatternNode(getOperator(), CChildren, getNumTypes());
   }
   New->setName(getName());
-  New->setType(getExtType());
+  New->Types = Types;
   New->setPredicateFns(getPredicateFns());
   New->setTransformFn(getTransformFn());
   return New;
@@ -803,7 +896,8 @@ TreePatternNode *TreePatternNode::clone() const {
 
 /// RemoveAllTypes - Recursively strip all the types of this tree.
 void TreePatternNode::RemoveAllTypes() {
-  setType(EEVT::TypeSet());  // Reset to unknown type.
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    Types[i] = EEVT::TypeSet();  // Reset to unknown type.
   if (isLeaf()) return;
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
     getChild(i)->RemoveAllTypes();
@@ -885,7 +979,8 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
   }
   
   FragTree->setName(getName());
-  FragTree->UpdateNodeType(getExtType(), TP);
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    FragTree->UpdateNodeType(i, getExtType(i), TP);
 
   // Transfer in the old predicates.
   for (unsigned i = 0, e = getPredicateFns().size(); i != e; ++i)
@@ -903,8 +998,10 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 /// type which should be applied to it.  This will infer the type of register
 /// references from the register file information, for example.
 ///
-static EEVT::TypeSet getImplicitType(Record *R, bool NotRegisters,
-                                     TreePattern &TP) {
+static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
+                                     bool NotRegisters, TreePattern &TP) {
+  assert(ResNo == 0 && "FIXME: Unhandled result number");
+  
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     if (NotRegisters) 
@@ -1015,17 +1112,23 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   if (isLeaf()) {
     if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
       // If it's a regclass or something else known, include the type.
-      return UpdateNodeType(getImplicitType(DI->getDef(), NotRegisters, TP),TP);
+      bool MadeChange = false;
+      for (unsigned i = 0, e = Types.size(); i != e; ++i)
+        MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i,
+                                                        NotRegisters, TP), TP);
+      return MadeChange;
     }
     
     if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
-      // Int inits are always integers. :)
-      bool MadeChange = Type.EnforceInteger(TP);
+      assert(Types.size() == 1 && "Invalid IntInit");
       
-      if (!hasTypeSet())
+      // Int inits are always integers. :)
+      bool MadeChange = Types[0].EnforceInteger(TP);
+      
+      if (!Types[0].isConcrete())
         return MadeChange;
       
-      MVT::SimpleValueType VT = getType();
+      MVT::SimpleValueType VT = getType(0);
       if (VT == MVT::iPTR || VT == MVT::iPTRAny)
         return MadeChange;
       
@@ -1046,7 +1149,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         return MadeChange;
       
       TP.error("Integer value '" + itostr(II->getValue())+
-               "' is out of range for type '" + getEnumName(getType()) + "'!");
+               "' is out of range for type '" + getEnumName(getType(0)) + "'!");
       return MadeChange;
     }
     return false;
@@ -1054,27 +1157,31 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   
   // special handling for set, which isn't really an SDNode.
   if (getOperator()->getName() == "set") {
-    assert (getNumChildren() >= 2 && "Missing RHS of a set?");
+    assert(getNumTypes() == 0 && "Set doesn't produce a value");
+    assert(getNumChildren() >= 2 && "Missing RHS of a set?");
     unsigned NC = getNumChildren();
-    bool MadeChange = false;
+    
+    TreePatternNode *SetVal = getChild(NC-1);
+    bool MadeChange = SetVal->ApplyTypeConstraints(TP, NotRegisters);
+
     for (unsigned i = 0; i < NC-1; ++i) {
-      MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-      MadeChange |= getChild(NC-1)->ApplyTypeConstraints(TP, NotRegisters);
+      TreePatternNode *Child = getChild(i);
+      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
     
       // Types of operands must match.
-      MadeChange |=getChild(i)->UpdateNodeType(getChild(NC-1)->getExtType(),TP);
-      MadeChange |=getChild(NC-1)->UpdateNodeType(getChild(i)->getExtType(),TP);
-      MadeChange |=UpdateNodeType(MVT::isVoid, TP);
+      MadeChange |= Child->UpdateNodeType(0, SetVal->getExtType(i), TP);
+      MadeChange |= SetVal->UpdateNodeType(i, Child->getExtType(0), TP);
     }
     return MadeChange;
   }
   
   if (getOperator()->getName() == "implicit" ||
       getOperator()->getName() == "parallel") {
+    assert(getNumTypes() == 0 && "Node doesn't produce a value");
+
     bool MadeChange = false;
     for (unsigned i = 0; i < getNumChildren(); ++i)
       MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-    MadeChange |= UpdateNodeType(MVT::isVoid, TP);
     return MadeChange;
   }
   
@@ -1083,13 +1190,16 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
     MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
     
+    assert(getChild(0)->getNumTypes() == 1 &&
+           getChild(1)->getNumTypes() == 1 && "Unhandled case");
+    
     // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
     // what type it gets, so if it didn't get a concrete type just give it the
     // first viable type from the reg class.
-    if (!getChild(1)->hasTypeSet() &&
-        !getChild(1)->getExtType().isCompletelyUnknown()) {
-      MVT::SimpleValueType RCVT = getChild(1)->getExtType().getTypeList()[0];
-      MadeChange |= getChild(1)->UpdateNodeType(RCVT, TP);
+    if (!getChild(1)->hasTypeSet(0) &&
+        !getChild(1)->getExtType(0).isCompletelyUnknown()) {
+      MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0];
+      MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP);
     }
     return MadeChange;
   }
@@ -1100,22 +1210,26 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     // Apply the result type to the node.
     unsigned NumRetVTs = Int->IS.RetVTs.size();
     unsigned NumParamVTs = Int->IS.ParamVTs.size();
-
+    if (NumRetVTs == 1 && Int->IS.RetVTs[0] == MVT::isVoid)
+      NumRetVTs = 0;
+    
     for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
-      MadeChange |= UpdateNodeType(Int->IS.RetVTs[i], TP);
+      MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
 
-    if (getNumChildren() != NumParamVTs + NumRetVTs)
+    if (getNumChildren() != NumParamVTs + 1)
       TP.error("Intrinsic '" + Int->Name + "' expects " +
-               utostr(NumParamVTs + NumRetVTs - 1) + " operands, not " +
+               utostr(NumParamVTs) + " operands, not " +
                utostr(getNumChildren() - 1) + " operands!");
 
     // Apply type info to the intrinsic ID.
-    MadeChange |= getChild(0)->UpdateNodeType(MVT::iPTR, TP);
+    MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP);
     
-    for (unsigned i = NumRetVTs, e = getNumChildren(); i != e; ++i) {
-      MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i - NumRetVTs];
-      MadeChange |= getChild(i)->UpdateNodeType(OpVT, TP);
-      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+    for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) {
+      MadeChange |= getChild(i+1)->ApplyTypeConstraints(TP, NotRegisters);
+      
+      MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i];
+      assert(getChild(i+1)->getNumTypes() == 1 && "Unhandled case");
+      MadeChange |= getChild(i+1)->UpdateNodeType(0, OpVT, TP);
     }
     return MadeChange;
   }
@@ -1126,51 +1240,60 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     bool MadeChange = NI.ApplyTypeConstraints(this, TP);
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-    // Branch, etc. do not produce results and top-level forms in instr pattern
-    // must have void types.
-    if (NI.getNumResults() == 0)
-      MadeChange |= UpdateNodeType(MVT::isVoid, TP);
-    
-    return MadeChange;  
+    return MadeChange;
   }
   
   if (getOperator()->isSubClassOf("Instruction")) {
     const DAGInstruction &Inst = CDP.getInstruction(getOperator());
-    bool MadeChange = false;
-    unsigned NumResults = Inst.getNumResults();
-    
-    assert(NumResults <= 1 &&
-           "Only supports zero or one result instrs!");
+    unsigned ResNo = 0;
+    assert(Inst.getNumResults() <= 1 &&
+           "FIXME: Only supports zero or one result instrs!");
 
     CodeGenInstruction &InstInfo =
-      CDP.getTargetInfo().getInstruction(getOperator()->getName());
+      CDP.getTargetInfo().getInstruction(getOperator());
+    
+    EEVT::TypeSet ResultType;
+    
     // Apply the result type to the node
-    if (NumResults == 0 || InstInfo.NumDefs == 0) {
-      MadeChange = UpdateNodeType(MVT::isVoid, TP);
-    } else {
+    if (InstInfo.NumDefs != 0) { // # of elements in (outs) list
       Record *ResultNode = Inst.getResult(0);
       
       if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange = UpdateNodeType(MVT::iPTR, TP);
+        ResultType = EEVT::TypeSet(MVT::iPTR, TP);
       } else if (ResultNode->getName() == "unknown") {
         // Nothing to do.
       } else {
         assert(ResultNode->isSubClassOf("RegisterClass") &&
                "Operands should be register classes!");
-
         const CodeGenRegisterClass &RC = 
           CDP.getTargetInfo().getRegisterClass(ResultNode);
-        MadeChange = UpdateNodeType(RC.getValueTypes(), TP);
+        ResultType = RC.getValueTypes();
       }
+    } else if (!InstInfo.ImplicitDefs.empty()) {
+      // If the instruction has implicit defs, the first one defines the result
+      // type.
+      Record *FirstImplicitDef = InstInfo.ImplicitDefs[0];
+      assert(FirstImplicitDef->isSubClassOf("Register"));
+      const std::vector<MVT::SimpleValueType> &RegVTs = 
+        CDP.getTargetInfo().getRegisterVTs(FirstImplicitDef);
+      if (RegVTs.size() == 1)   // FIXME: Generalize.
+        ResultType = EEVT::TypeSet(RegVTs);
+    } else {
+      // Otherwise, the instruction produces no value result.
     }
     
+    bool MadeChange = false;
+    
+    if (!ResultType.isCompletelyUnknown())
+      MadeChange |= UpdateNodeType(ResNo, ResultType, TP);
+    
     // If this is an INSERT_SUBREG, constrain the source and destination VTs to
     // be the same.
     if (getOperator()->getName() == "INSERT_SUBREG") {
-      MadeChange |= UpdateNodeType(getChild(0)->getExtType(), TP);
-      MadeChange |= getChild(0)->UpdateNodeType(getExtType(), TP);
+      assert(getChild(0)->getNumTypes() == 1 && "FIXME: Unhandled");
+      MadeChange |= UpdateNodeType(0, getChild(0)->getExtType(0), TP);
+      MadeChange |= getChild(0)->UpdateNodeType(0, getExtType(0), TP);
     }
-    
 
     unsigned ChildNo = 0;
     for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
@@ -1191,15 +1314,17 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       
       MVT::SimpleValueType VT;
       TreePatternNode *Child = getChild(ChildNo++);
+      assert(Child->getNumTypes() == 1 && "Unknown case?");
+      
       if (OperandNode->isSubClassOf("RegisterClass")) {
         const CodeGenRegisterClass &RC = 
           CDP.getTargetInfo().getRegisterClass(OperandNode);
-        MadeChange |= Child->UpdateNodeType(RC.getValueTypes(), TP);
+        MadeChange |= Child->UpdateNodeType(0, RC.getValueTypes(), TP);
       } else if (OperandNode->isSubClassOf("Operand")) {
         VT = getValueType(OperandNode->getValueAsDef("Type"));
-        MadeChange |= Child->UpdateNodeType(VT, TP);
+        MadeChange |= Child->UpdateNodeType(0, VT, TP);
       } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange |= Child->UpdateNodeType(MVT::iPTR, TP);
+        MadeChange |= Child->UpdateNodeType(0, MVT::iPTR, TP);
       } else if (OperandNode->getName() == "unknown") {
         // Nothing to do.
       } else {
@@ -1331,6 +1456,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
     ComputeNamedNodes(N->getChild(i));
 }
 
+
 TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
   DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
   if (!OpDef) error("Pattern has unexpected operator type!");
@@ -1359,11 +1485,11 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
         Args.push_back(Dag->getArgName(0));
       }
       
-      New = new TreePatternNode(DI);
+      New = new TreePatternNode(DI, 1);
     } else if (DagInit *DI = dynamic_cast<DagInit*>(Arg)) {
       New = ParseTreePattern(DI);
     } else if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
-      New = new TreePatternNode(II);
+      New = new TreePatternNode(II, 1);
       if (!Dag->getArgName(0).empty())
         error("Constant int argument should not have a name!");
     } else if (BitsInit *BI = dynamic_cast<BitsInit*>(Arg)) {
@@ -1372,7 +1498,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       if (II == 0 || !dynamic_cast<IntInit*>(II))
         error("Bits value must be constants!");
       
-      New = new TreePatternNode(dynamic_cast<IntInit*>(II));
+      New = new TreePatternNode(dynamic_cast<IntInit*>(II), 1);
       if (!Dag->getArgName(0).empty())
         error("Constant int argument should not have a name!");
     } else {
@@ -1382,7 +1508,8 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
     }
     
     // Apply the type cast.
-    New->UpdateNodeType(getValueType(Operator), *this);
+    assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
+    New->UpdateNodeType(0, getValueType(Operator), *this);
     if (New->getNumChildren() == 0)
       New->setName(Dag->getArgName(0));
     return New;
@@ -1421,7 +1548,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
                               std::vector<std::pair<Init*, std::string> >()));
         --i;  // Revisit this node...
       } else {
-        TreePatternNode *Node = new TreePatternNode(DefI);
+        TreePatternNode *Node = new TreePatternNode(DefI, 1);
         Node->setName(Dag->getArgName(i));
         Children.push_back(Node);
         
@@ -1433,7 +1560,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
         }
       }
     } else if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
-      TreePatternNode *Node = new TreePatternNode(II);
+      TreePatternNode *Node = new TreePatternNode(II, 1);
       if (!Dag->getArgName(i).empty())
         error("Constant int argument should not have a name!");
       Children.push_back(Node);
@@ -1443,7 +1570,7 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       if (II == 0 || !dynamic_cast<IntInit*>(II))
         error("Bits value must be constants!");
       
-      TreePatternNode *Node = new TreePatternNode(dynamic_cast<IntInit*>(II));
+      TreePatternNode *Node = new TreePatternNode(dynamic_cast<IntInit*>(II),1);
       if (!Dag->getArgName(i).empty())
         error("Constant int argument should not have a name!");
       Children.push_back(Node);
@@ -1474,11 +1601,12 @@ TreePatternNode *TreePattern::ParseTreePattern(DagInit *Dag) {
       Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
     }
     
-    TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID));
+    TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID), 1);
     Children.insert(Children.begin(), IIDNode);
   }
   
-  TreePatternNode *Result = new TreePatternNode(Operator, Children);
+  unsigned NumResults = GetNumNodeResults(Operator, CDP);
+  TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
   Result->setName(Dag->getName());
   return Result;
 }
@@ -1525,7 +1653,11 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
               continue;
           }
           
-          MadeChange |=Nodes[i]->UpdateNodeType(InNodes[0]->getExtType(),*this);
+          assert(Nodes[i]->getNumTypes() == 1 &&
+                 InNodes[0]->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+          MadeChange |= Nodes[i]->UpdateNodeType(0, InNodes[0]->getExtType(0),
+                                                 *this);
         }
       }
       
@@ -1533,8 +1665,12 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
       // same type.
       if (I->second.size() > 1) {
         for (unsigned i = 0, e = Nodes.size()-1; i != e; ++i) {
-          MadeChange |=Nodes[i]->UpdateNodeType(Nodes[i+1]->getExtType(),*this);
-          MadeChange |=Nodes[i+1]->UpdateNodeType(Nodes[i]->getExtType(),*this);
+          TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
+          assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+          
+          MadeChange |= N1->UpdateNodeType(0, N2->getExtType(0), *this);
+          MadeChange |= N2->UpdateNodeType(0, N1->getExtType(0), *this);
         }
       }
     }
@@ -1832,7 +1968,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   // Ensure that the inputs agree if we've already seen this input.
   if (Rec != SlotRec)
     I->error("All $" + Pat->getName() + " inputs must agree with each other");
-  if (Slot->getExtType() != Pat->getExtType())
+  if (Slot->getExtTypes() != Pat->getExtTypes())
     I->error("All $" + Pat->getName() + " inputs must agree with each other");
   return true;
 }
@@ -1871,7 +2007,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     // If this is not a set, verify that the children nodes are not void typed,
     // and recurse.
     for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
-      if (Pat->getChild(i)->getType() == MVT::isVoid)
+      if (Pat->getChild(i)->getNumTypes() == 0)
         I->error("Cannot have void nodes inside of patterns!");
       FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
                                   InstImpInputs, InstImpResults);
@@ -1933,10 +2069,12 @@ class InstAnalyzer {
   bool &mayStore;
   bool &mayLoad;
   bool &HasSideEffects;
+  bool &IsVariadic;
 public:
   InstAnalyzer(const CodeGenDAGPatterns &cdp,
-               bool &maystore, bool &mayload, bool &hse)
-    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse){
+               bool &maystore, bool &mayload, bool &hse, bool &isv)
+    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse),
+      IsVariadic(isv) {
   }
 
   /// Analyze - Analyze the specified instruction, returning true if the
@@ -1985,6 +2123,7 @@ private:
     if (OpInfo.hasProperty(SDNPMayStore)) mayStore = true;
     if (OpInfo.hasProperty(SDNPMayLoad)) mayLoad = true;
     if (OpInfo.hasProperty(SDNPSideEffect)) HasSideEffects = true;
+    if (OpInfo.hasProperty(SDNPVariadic)) IsVariadic = true;
 
     if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
       // If this is an intrinsic, analyze it.
@@ -2004,12 +2143,13 @@ private:
 
 static void InferFromPattern(const CodeGenInstruction &Inst,
                              bool &MayStore, bool &MayLoad,
-                             bool &HasSideEffects,
+                             bool &HasSideEffects, bool &IsVariadic,
                              const CodeGenDAGPatterns &CDP) {
-  MayStore = MayLoad = HasSideEffects = false;
+  MayStore = MayLoad = HasSideEffects = IsVariadic = false;
 
   bool HadPattern =
-    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects).Analyze(Inst.TheDef);
+    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects, IsVariadic)
+    .Analyze(Inst.TheDef);
 
   // InstAnalyzer only correctly analyzes mayStore/mayLoad so far.
   if (Inst.mayStore) {  // If the .td file explicitly sets mayStore, use it.
@@ -2047,6 +2187,9 @@ static void InferFromPattern(const CodeGenInstruction &Inst,
               "which already inferred this.\n", Inst.TheDef->getName().c_str());
     HasSideEffects = true;
   }
+  
+  if (Inst.isVariadic)
+    IsVariadic = true;  // Can warn if we want.
 }
 
 /// ParseInstructions - Parse all of the instructions, inlining and resolving
@@ -2068,7 +2211,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       std::vector<Record*> Results;
       std::vector<Record*> Operands;
       
-      CodeGenInstruction &InstInfo =Target.getInstruction(Instrs[i]->getName());
+      CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
 
       if (InstInfo.OperandList.size() != 0) {
         if (InstInfo.NumDefs == 0) {
@@ -2119,7 +2262,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
     // fill in the InstResults map.
     for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
       TreePatternNode *Pat = I->getTree(j);
-      if (!Pat->hasTypeSet() || Pat->getType() != MVT::isVoid)
+      if (Pat->getNumTypes() != 0)
         I->error("Top-level forms in instruction pattern should have"
                  " void types");
 
@@ -2135,11 +2278,11 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
     // Parse the operands list from the (ops) list, validating it.
     assert(I->getArgList().empty() && "Args list should still be empty here!");
-    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]->getName());
+    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]);
 
     // Check that all of the results occur first in the list.
     std::vector<Record*> Results;
-    TreePatternNode *Res0Node = NULL;
+    TreePatternNode *Res0Node = 0;
     for (unsigned i = 0; i != NumResults; ++i) {
       if (i == CGI.OperandList.size())
         I->error("'" + InstResults.begin()->first +
@@ -2217,7 +2360,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
         OpNode->setTransformFn(0);
         std::vector<TreePatternNode*> Children;
         Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
       }
       
       ResultNodeOperands.push_back(OpNode);
@@ -2228,10 +2371,11 @@ void CodeGenDAGPatterns::ParseInstructions() {
                " occurs in pattern but not in operands list!");
 
     TreePatternNode *ResultPattern =
-      new TreePatternNode(I->getRecord(), ResultNodeOperands);
+      new TreePatternNode(I->getRecord(), ResultNodeOperands,
+                          GetNumNodeResults(I->getRecord(), *this));
     // Copy fully inferred output node type to instruction result pattern.
-    if (NumResults > 0)
-      ResultPattern->setType(Res0Node->getExtType());
+    for (unsigned i = 0; i != NumResults; ++i)
+      ResultPattern->setType(i, Res0Node->getExtType(i));
 
     // Create and insert the instruction.
     // FIXME: InstImpResults and InstImpInputs should not be part of
@@ -2292,7 +2436,7 @@ static void FindNames(const TreePatternNode *P,
     // If this is the first instance of the name, remember the node.
     if (Rec.second++ == 0)
       Rec.first = P;
-    else if (Rec.first->getType() != P->getType())
+    else if (Rec.first->getExtTypes() != P->getExtTypes())
       PatternTop->error("repetition of value: $" + P->getName() +
                         " where different uses have different types!");
   }
@@ -2347,17 +2491,19 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
 
 
 void CodeGenDAGPatterns::InferInstructionFlags() {
-  std::map<std::string, CodeGenInstruction> &InstrDescs =
-    Target.getInstructions();
-  for (std::map<std::string, CodeGenInstruction>::iterator
-         II = InstrDescs.begin(), E = InstrDescs.end(); II != E; ++II) {
-    CodeGenInstruction &InstInfo = II->second;
+  const std::vector<const CodeGenInstruction*> &Instructions =
+    Target.getInstructionsByEnumValue();
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+    CodeGenInstruction &InstInfo =
+      const_cast<CodeGenInstruction &>(*Instructions[i]);
     // Determine properties of the instruction from its pattern.
-    bool MayStore, MayLoad, HasSideEffects;
-    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, *this);
+    bool MayStore, MayLoad, HasSideEffects, IsVariadic;
+    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, IsVariadic,
+                     *this);
     InstInfo.mayStore = MayStore;
     InstInfo.mayLoad = MayLoad;
     InstInfo.hasSideEffects = HasSideEffects;
+    InstInfo.isVariadic = IsVariadic;
   }
 }
 
@@ -2378,23 +2524,29 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
 
   // If this type is already concrete or completely unknown we can't do
   // anything.
-  if (N->getExtType().isCompletelyUnknown() || N->getExtType().isConcrete())
-    return false;
+  for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i) {
+    if (N->getExtType(i).isCompletelyUnknown() || N->getExtType(i).isConcrete())
+      continue;
   
-  // Otherwise, force its type to the first possibility (an arbitrary choice).
-  return N->getExtType().MergeInTypeInfo(N->getExtType().getTypeList()[0], TP);
+    // Otherwise, force its type to the first possibility (an arbitrary choice).
+    if (N->getExtType(i).MergeInTypeInfo(N->getExtType(i).getTypeList()[0], TP))
+      return true;
+  }
+  
+  return false;
 }
 
 void CodeGenDAGPatterns::ParsePatterns() {
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
 
   for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
-    DagInit *Tree = Patterns[i]->getValueAsDag("PatternToMatch");
+    Record *CurPattern = Patterns[i];
+    DagInit *Tree = CurPattern->getValueAsDag("PatternToMatch");
     DefInit *OpDef = dynamic_cast<DefInit*>(Tree->getOperator());
     Record *Operator = OpDef->getDef();
     TreePattern *Pattern;
     if (Operator->getName() != "parallel")
-      Pattern = new TreePattern(Patterns[i], Tree, true, *this);
+      Pattern = new TreePattern(CurPattern, Tree, true, *this);
     else {
       std::vector<Init*> Values;
       RecTy *ListTy = 0;
@@ -2419,17 +2571,17 @@ void CodeGenDAGPatterns::ParsePatterns() {
         }
       }
       ListInit *LI = new ListInit(Values, new ListRecTy(ListTy));
-      Pattern = new TreePattern(Patterns[i], LI, true, *this);
+      Pattern = new TreePattern(CurPattern, LI, true, *this);
     }
 
     // Inline pattern fragments into it.
     Pattern->InlinePatternFragments();
     
-    ListInit *LI = Patterns[i]->getValueAsListInit("ResultInstrs");
+    ListInit *LI = CurPattern->getValueAsListInit("ResultInstrs");
     if (LI->getSize() == 0) continue;  // no pattern.
     
     // Parse the instruction.
-    TreePattern *Result = new TreePattern(Patterns[i], LI, false, *this);
+    TreePattern *Result = new TreePattern(CurPattern, LI, false, *this);
     
     // Inline pattern fragments into it.
     Result->InlinePatternFragments();
@@ -2451,14 +2603,20 @@ void CodeGenDAGPatterns::ParsePatterns() {
       InferredAllResultTypes =
         Result->InferAllTypes(&Pattern->getNamedNodesMap());
 
+      IterateInference = false;
+      
       // Apply the type of the result to the source pattern.  This helps us
       // resolve cases where the input type is known to be a pointer type (which
       // is considered resolved), but the result knows it needs to be 32- or
       // 64-bits.  Infer the other way for good measure.
-      IterateInference = Pattern->getTree(0)->
-        UpdateNodeType(Result->getTree(0)->getExtType(), *Result);
-      IterateInference |= Result->getTree(0)->
-        UpdateNodeType(Pattern->getTree(0)->getExtType(), *Result);
+      for (unsigned i = 0, e = std::min(Result->getTree(0)->getNumTypes(),
+                                        Pattern->getTree(0)->getNumTypes());
+           i != e; ++i) {
+        IterateInference = Pattern->getTree(0)->
+          UpdateNodeType(i, Result->getTree(0)->getExtType(i), *Result);
+        IterateInference |= Result->getTree(0)->
+          UpdateNodeType(i, Pattern->getTree(0)->getExtType(i), *Result);
+      }
       
       // If our iteration has converged and the input pattern's types are fully
       // resolved but the result pattern is not fully resolved, we may have a
@@ -2473,7 +2631,6 @@ void CodeGenDAGPatterns::ParsePatterns() {
           !InferredAllResultTypes)
         IterateInference = ForceArbitraryInstResultType(Result->getTree(0),
                                                         *Result);
-      
     } while (IterateInference);
     
     // Verify that we inferred enough types that we can do something with the
@@ -2504,25 +2661,29 @@ void CodeGenDAGPatterns::ParsePatterns() {
         OpNode->setTransformFn(0);
         std::vector<TreePatternNode*> Children;
         Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
       }
       ResultNodeOperands.push_back(OpNode);
     }
     DstPattern = Result->getOnlyTree();
     if (!DstPattern->isLeaf())
       DstPattern = new TreePatternNode(DstPattern->getOperator(),
-                                       ResultNodeOperands);
-    DstPattern->setType(Result->getOnlyTree()->getExtType());
+                                       ResultNodeOperands,
+                                       DstPattern->getNumTypes());
+    
+    for (unsigned i = 0, e = Result->getOnlyTree()->getNumTypes(); i != e; ++i)
+      DstPattern->setType(i, Result->getOnlyTree()->getExtType(i));
+    
     TreePattern Temp(Result->getRecord(), DstPattern, false, *this);
     Temp.InferAllTypes();
 
     
     AddPatternToMatch(Pattern,
-                 PatternToMatch(Patterns[i]->getValueAsListInit("Predicates"),
-                                Pattern->getTree(0),
-                                Temp.getOnlyTree(), InstImpResults,
-                                Patterns[i]->getValueAsInt("AddedComplexity"),
-                                Patterns[i]->getID()));
+                    PatternToMatch(CurPattern->getValueAsListInit("Predicates"),
+                                   Pattern->getTree(0),
+                                   Temp.getOnlyTree(), InstImpResults,
+                                   CurPattern->getValueAsInt("AddedComplexity"),
+                                   CurPattern->getID()));
   }
 }
 
@@ -2556,13 +2717,15 @@ static void CombineChildVariants(TreePatternNode *Orig,
     std::vector<TreePatternNode*> NewChildren;
     for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
       NewChildren.push_back(ChildVariants[i][Idxs[i]]);
-    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren);
+    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
+                                             Orig->getNumTypes());
     
     // Copy over properties.
     R->setName(Orig->getName());
     R->setPredicateFns(Orig->getPredicateFns());
     R->setTransformFn(Orig->getTransformFn());
-    R->setType(Orig->getExtType());
+    for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
+      R->setType(i, Orig->getExtType(i));
     
     // If this pattern cannot match, do not include it as a variant.
     std::string ErrString;
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 44f82fe68469..f583f298f662 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -15,14 +15,14 @@
 #ifndef CODEGEN_DAGPATTERNS_H
 #define CODEGEN_DAGPATTERNS_H
 
-#include <set>
-#include <algorithm>
-#include <vector>
-
 #include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include <set>
+#include <algorithm>
+#include <vector>
+#include <map>
 
 namespace llvm {
   class Record;
@@ -41,11 +41,6 @@ namespace llvm {
 /// arbitrary integer, floating-point, and vector types, so only an unknown
 /// value is needed.
 namespace EEVT {
-  enum DAGISelGenValueType {
-    // FIXME: Remove EEVT::isUnknown!
-    isUnknown  = MVT::LAST_VALUETYPE
-  };
-  
   /// TypeSet - This is either empty if it's completely unknown, or holds a set
   /// of types.  It is used during type inference because register classes can
   /// have multiple possible types and we don't know which one they get until
@@ -59,7 +54,7 @@ namespace EEVT {
   ///    Vector has one concrete type: The type is completely known.
   ///
   class TypeSet {
-    SmallVector<MVT::SimpleValueType, 2> TypeVec;
+    SmallVector<MVT::SimpleValueType, 4> TypeVec;
   public:
     TypeSet() {}
     TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
@@ -88,6 +83,10 @@ namespace EEVT {
       return TypeVec;
     }
     
+    bool isVoid() const {
+      return TypeVec.size() == 1 && TypeVec[0] == MVT::isVoid;
+    }
+    
     /// hasIntegerTypes - Return true if this TypeSet contains any integer value
     /// types.
     bool hasIntegerTypes() const;
@@ -134,6 +133,14 @@ namespace EEVT {
     
     bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
     bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
+    
+  private:
+    /// FillWithPossibleTypes - Set to all legal types and return true, only
+    /// valid on completely unknown type sets.  If Pred is non-null, only MVTs
+    /// that pass the predicate are added.
+    bool FillWithPossibleTypes(TreePattern &TP,
+                               bool (*Pred)(MVT::SimpleValueType) = 0,
+                               const char *PredicateName = 0);
   };
 }
 
@@ -175,11 +182,6 @@ struct SDTypeConstraint {
   /// exception.
   bool ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo,
                            TreePattern &TP) const;
-  
-  /// getOperandNum - Return the node corresponding to operand #OpNo in tree
-  /// N, which has NumResults results.
-  TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
-                                 unsigned NumResults) const;
 };
 
 /// SDNodeInfo - One of these records is created for each SDNode instance in
@@ -208,8 +210,8 @@ public:
   
   /// getKnownType - If the type constraints on this node imply a fixed type
   /// (e.g. all stores return void, etc), then return it as an
-  /// MVT::SimpleValueType.  Otherwise, return EEVT::isUnknown.
-  unsigned getKnownType() const;
+  /// MVT::SimpleValueType.  Otherwise, return MVT::Other.
+  MVT::SimpleValueType getKnownType() const;
   
   /// hasProperty - Return true if this node has the specified property.
   ///
@@ -231,10 +233,10 @@ public:
 /// patterns), and as such should be ref counted.  We currently just leak all
 /// TreePatternNode objects!
 class TreePatternNode {
-  /// The type of this node.  Before and during type inference, this may be a
-  /// set of possible types.  After (successful) type inference, this is a
-  /// single type.
-  EEVT::TypeSet Type;
+  /// The type of each node result.  Before and during type inference, each
+  /// result may be a set of possible types.  After (successful) type inference,
+  /// each is a single concrete type.
+  SmallVector<EEVT::TypeSet, 1> Types;
   
   /// Operator - The Record for the operator if this is an interior node (not
   /// a leaf).
@@ -258,10 +260,14 @@ class TreePatternNode {
   
   std::vector<TreePatternNode*> Children;
 public:
-  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch) 
-    : Operator(Op), Val(0), TransformFn(0), Children(Ch) { }
-  TreePatternNode(Init *val)    // leaf ctor
+  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch,
+                  unsigned NumResults) 
+    : Operator(Op), Val(0), TransformFn(0), Children(Ch) {
+    Types.resize(NumResults);
+  }
+  TreePatternNode(Init *val, unsigned NumResults)    // leaf ctor
     : Operator(0), Val(val), TransformFn(0) {
+    Types.resize(NumResults);
   }
   ~TreePatternNode();
   
@@ -271,14 +277,24 @@ public:
   bool isLeaf() const { return Val != 0; }
   
   // Type accessors.
-  MVT::SimpleValueType getType() const { return Type.getConcrete(); }
-  const EEVT::TypeSet &getExtType() const { return Type; }
-  EEVT::TypeSet &getExtType() { return Type; }
-  void setType(const EEVT::TypeSet &T) { Type = T; }
+  unsigned getNumTypes() const { return Types.size(); }
+  MVT::SimpleValueType getType(unsigned ResNo) const {
+    return Types[ResNo].getConcrete();
+  }
+  const SmallVectorImpl<EEVT::TypeSet> &getExtTypes() const { return Types; }
+  const EEVT::TypeSet &getExtType(unsigned ResNo) const { return Types[ResNo]; }
+  EEVT::TypeSet &getExtType(unsigned ResNo) { return Types[ResNo]; }
+  void setType(unsigned ResNo, const EEVT::TypeSet &T) { Types[ResNo] = T; }
   
-  bool hasTypeSet() const { return Type.isConcrete(); }
-  bool isTypeCompletelyUnknown() const { return Type.isCompletelyUnknown(); }
-  bool isTypeDynamicallyResolved() const { return Type.isDynamicallyResolved();}
+  bool hasTypeSet(unsigned ResNo) const {
+    return Types[ResNo].isConcrete();
+  }
+  bool isTypeCompletelyUnknown(unsigned ResNo) const {
+    return Types[ResNo].isCompletelyUnknown();
+  }
+  bool isTypeDynamicallyResolved(unsigned ResNo) const {
+    return Types[ResNo].isDynamicallyResolved();
+  }
   
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
@@ -371,18 +387,22 @@ public:   // Higher level manipulation routines.
   /// information.  If N already contains a conflicting type, then throw an
   /// exception.  This returns true if any information was updated.
   ///
-  bool UpdateNodeType(const EEVT::TypeSet &InTy, TreePattern &TP) {
-    return Type.MergeInTypeInfo(InTy, TP);
+  bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(InTy, TP);
   }
 
-  bool UpdateNodeType(MVT::SimpleValueType InTy, TreePattern &TP) {
-    return Type.MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
+  bool UpdateNodeType(unsigned ResNo, MVT::SimpleValueType InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
   }
   
   /// ContainsUnresolvedType - Return true if this tree contains any
   /// unresolved types.
   bool ContainsUnresolvedType() const {
-    if (!hasTypeSet()) return true;
+    for (unsigned i = 0, e = Types.size(); i != e; ++i)
+      if (!Types[i].isConcrete()) return true;
+    
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       if (getChild(i)->ContainsUnresolvedType()) return true;
     return false;
@@ -672,6 +692,11 @@ public:
     assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
     return PatternFragments.find(R)->second;
   }
+  TreePattern *getPatternFragmentIfRead(Record *R) const {
+    if (!PatternFragments.count(R)) return 0;
+    return PatternFragments.find(R)->second;
+  }
+  
   typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
           pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index f5b52ecefb35..eea55618721e 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -123,36 +123,43 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
   hasOptionalDef = false;
   isVariadic = false;
+  ImplicitDefs = R->getValueAsListOfDefs("Defs");
+  ImplicitUses = R->getValueAsListOfDefs("Uses");
 
   if (neverHasSideEffects + hasSideEffects > 1)
     throw R->getName() + ": multiple conflicting side-effect flags set!";
 
-  DagInit *DI;
-  try {
-    DI = R->getValueAsDag("OutOperandList");
-  } catch (...) {
-    // Error getting operand list, just ignore it (sparcv9).
-    AsmString.clear();
-    OperandList.clear();
-    return;
-  }
-  NumDefs = DI->getNumArgs();
-
-  DagInit *IDI;
-  try {
-    IDI = R->getValueAsDag("InOperandList");
-  } catch (...) {
-    // Error getting operand list, just ignore it (sparcv9).
-    AsmString.clear();
-    OperandList.clear();
-    return;
-  }
-  DI = (DagInit*)(new BinOpInit(BinOpInit::CONCAT, DI, IDI, new DagRecTy))->Fold(R, 0);
+  DagInit *OutDI = R->getValueAsDag("OutOperandList");
 
+  if (DefInit *Init = dynamic_cast<DefInit*>(OutDI->getOperator())) {
+    if (Init->getDef()->getName() != "outs")
+      throw R->getName() + ": invalid def name for output list: use 'outs'";
+  } else
+    throw R->getName() + ": invalid output list: use 'outs'";
+    
+  NumDefs = OutDI->getNumArgs();
+    
+  DagInit *InDI = R->getValueAsDag("InOperandList");
+  if (DefInit *Init = dynamic_cast<DefInit*>(InDI->getOperator())) {
+    if (Init->getDef()->getName() != "ins")
+      throw R->getName() + ": invalid def name for input list: use 'ins'";
+  } else
+    throw R->getName() + ": invalid input list: use 'ins'";
+    
   unsigned MIOperandNo = 0;
   std::set<std::string> OperandNames;
-  for (unsigned i = 0, e = DI->getNumArgs(); i != e; ++i) {
-    DefInit *Arg = dynamic_cast<DefInit*>(DI->getArg(i));
+  for (unsigned i = 0, e = InDI->getNumArgs()+OutDI->getNumArgs(); i != e; ++i){
+    Init *ArgInit;
+    std::string ArgName;
+    if (i < NumDefs) {
+      ArgInit = OutDI->getArg(i);
+      ArgName = OutDI->getArgName(i);
+    } else {
+      ArgInit = InDI->getArg(i-NumDefs);
+      ArgName = InDI->getArgName(i-NumDefs);
+    }
+    
+    DefInit *Arg = dynamic_cast<DefInit*>(ArgInit);
     if (!Arg)
       throw "Illegal operand for the '" + R->getName() + "' instruction!";
 
@@ -189,14 +196,14 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
             "' in '" + R->getName() + "' instruction!";
 
     // Check that the operand has a name and that it's unique.
-    if (DI->getArgName(i).empty())
+    if (ArgName.empty())
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
         " has no name!";
-    if (!OperandNames.insert(DI->getArgName(i)).second)
+    if (!OperandNames.insert(ArgName).second)
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
         " has the same name as a previous operand!";
 
-    OperandList.push_back(OperandInfo(Rec, DI->getArgName(i), PrintMethod,
+    OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod,
                                       MIOperandNo, NumOps, MIOpInfo));
     MIOperandNo += NumOps;
   }
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index aae2cac86a3d..c369123dd694 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -105,7 +105,8 @@ namespace llvm {
           MINumOperands(MINO), MIOperandInfo(MIOI) {}
     };
 
-    /// NumDefs - Number of def operands declared.
+    /// NumDefs - Number of def operands declared, this is the number of
+    /// elements in the instruction's (outs) list.
     ///
     unsigned NumDefs;
 
@@ -113,6 +114,10 @@ namespace llvm {
     /// type (which is a record).
     std::vector<OperandInfo> OperandList;
 
+    /// ImplicitDefs/ImplicitUses - These are lists of registers that are
+    /// implicitly defined and used by the instruction.
+    std::vector<Record*> ImplicitDefs, ImplicitUses;
+
     // Various boolean values we track for the instruction.
     bool isReturn;
     bool isBranch;
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index ec6a31fd195f..79bc30d8c9ad 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -120,24 +120,21 @@ const std::string &CodeGenTarget::getName() const {
 }
 
 std::string CodeGenTarget::getInstNamespace() const {
-  std::string InstNS;
-
   for (inst_iterator i = inst_begin(), e = inst_end(); i != e; ++i) {
-    InstNS = i->second.Namespace;
-
-    // Make sure not to pick up "TargetInstrInfo" by accidentally getting
+    // Make sure not to pick up "TargetOpcode" by accidentally getting
     // the namespace off the PHI instruction or something.
-    if (InstNS != "TargetInstrInfo")
-      break;
+    if ((*i)->Namespace != "TargetOpcode")
+      return (*i)->Namespace;
   }
 
-  return InstNS;
+  return "";
 }
 
 Record *CodeGenTarget::getInstructionSet() const {
   return TargetRec->getValueAsDef("InstructionSet");
 }
 
+
 /// getAsmParser - Return the AssemblyParser definition for this target.
 ///
 Record *CodeGenTarget::getAsmParser() const {
@@ -277,98 +274,92 @@ void CodeGenTarget::ReadInstructions() const {
 
   for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
     std::string AsmStr = Insts[i]->getValueAsString(InstFormatName);
-    Instructions.insert(std::make_pair(Insts[i]->getName(),
-                                       CodeGenInstruction(Insts[i], AsmStr)));
+    Instructions[Insts[i]] = new CodeGenInstruction(Insts[i], AsmStr);
   }
 }
 
+static const CodeGenInstruction *
+GetInstByName(const char *Name,
+              const DenseMap<const Record*, CodeGenInstruction*> &Insts) {
+  const Record *Rec = Records.getDef(Name);
+  
+  DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+    I = Insts.find(Rec);
+  if (Rec == 0 || I == Insts.end())
+    throw std::string("Could not find '") + Name + "' instruction!";
+  return I->second;
+}
+
+namespace {
+/// SortInstByName - Sorting predicate to sort instructions by name.
+///
+struct SortInstByName {
+  bool operator()(const CodeGenInstruction *Rec1,
+                  const CodeGenInstruction *Rec2) const {
+    return Rec1->TheDef->getName() < Rec2->TheDef->getName();
+  }
+};
+}
+
 /// getInstructionsByEnumValue - Return all of the instructions defined by the
 /// target, ordered by their enum value.
-void CodeGenTarget::
-getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
-                                                 &NumberedInstructions) {
-  std::map<std::string, CodeGenInstruction>::const_iterator I;
-  I = getInstructions().find("PHI");
-  if (I == Instructions.end()) throw "Could not find 'PHI' instruction!";
-  const CodeGenInstruction *PHI = &I->second;
-  
-  I = getInstructions().find("INLINEASM");
-  if (I == Instructions.end()) throw "Could not find 'INLINEASM' instruction!";
-  const CodeGenInstruction *INLINEASM = &I->second;
-  
-  I = getInstructions().find("DBG_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'DBG_LABEL' instruction!";
-  const CodeGenInstruction *DBG_LABEL = &I->second;
-  
-  I = getInstructions().find("EH_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'EH_LABEL' instruction!";
-  const CodeGenInstruction *EH_LABEL = &I->second;
-  
-  I = getInstructions().find("GC_LABEL");
-  if (I == Instructions.end()) throw "Could not find 'GC_LABEL' instruction!";
-  const CodeGenInstruction *GC_LABEL = &I->second;
-  
-  I = getInstructions().find("KILL");
-  if (I == Instructions.end()) throw "Could not find 'KILL' instruction!";
-  const CodeGenInstruction *KILL = &I->second;
-  
-  I = getInstructions().find("EXTRACT_SUBREG");
-  if (I == Instructions.end()) 
-    throw "Could not find 'EXTRACT_SUBREG' instruction!";
-  const CodeGenInstruction *EXTRACT_SUBREG = &I->second;
-  
-  I = getInstructions().find("INSERT_SUBREG");
-  if (I == Instructions.end()) 
-    throw "Could not find 'INSERT_SUBREG' instruction!";
-  const CodeGenInstruction *INSERT_SUBREG = &I->second;
-  
-  I = getInstructions().find("IMPLICIT_DEF");
-  if (I == Instructions.end())
-    throw "Could not find 'IMPLICIT_DEF' instruction!";
-  const CodeGenInstruction *IMPLICIT_DEF = &I->second;
-  
-  I = getInstructions().find("SUBREG_TO_REG");
-  if (I == Instructions.end())
-    throw "Could not find 'SUBREG_TO_REG' instruction!";
-  const CodeGenInstruction *SUBREG_TO_REG = &I->second;
-
-  I = getInstructions().find("COPY_TO_REGCLASS");
-  if (I == Instructions.end())
-    throw "Could not find 'COPY_TO_REGCLASS' instruction!";
-  const CodeGenInstruction *COPY_TO_REGCLASS = &I->second;
-
-  I = getInstructions().find("DBG_VALUE");
-  if (I == Instructions.end())
-    throw "Could not find 'DBG_VALUE' instruction!";
-  const CodeGenInstruction *DBG_VALUE = &I->second;
+void CodeGenTarget::ComputeInstrsByEnum() const {
+  const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
+  const CodeGenInstruction *PHI = GetInstByName("PHI", Insts);
+  const CodeGenInstruction *INLINEASM = GetInstByName("INLINEASM", Insts);
+  const CodeGenInstruction *DBG_LABEL = GetInstByName("DBG_LABEL", Insts);
+  const CodeGenInstruction *EH_LABEL = GetInstByName("EH_LABEL", Insts);
+  const CodeGenInstruction *GC_LABEL = GetInstByName("GC_LABEL", Insts);
+  const CodeGenInstruction *KILL = GetInstByName("KILL", Insts);
+  const CodeGenInstruction *EXTRACT_SUBREG =
+    GetInstByName("EXTRACT_SUBREG", Insts);
+  const CodeGenInstruction *INSERT_SUBREG =
+    GetInstByName("INSERT_SUBREG", Insts);
+  const CodeGenInstruction *IMPLICIT_DEF = GetInstByName("IMPLICIT_DEF", Insts);
+  const CodeGenInstruction *SUBREG_TO_REG =
+    GetInstByName("SUBREG_TO_REG", Insts);
+  const CodeGenInstruction *COPY_TO_REGCLASS =
+    GetInstByName("COPY_TO_REGCLASS", Insts);
+  const CodeGenInstruction *DBG_VALUE = GetInstByName("DBG_VALUE", Insts);
 
   // Print out the rest of the instructions now.
-  NumberedInstructions.push_back(PHI);
-  NumberedInstructions.push_back(INLINEASM);
-  NumberedInstructions.push_back(DBG_LABEL);
-  NumberedInstructions.push_back(EH_LABEL);
-  NumberedInstructions.push_back(GC_LABEL);
-  NumberedInstructions.push_back(KILL);
-  NumberedInstructions.push_back(EXTRACT_SUBREG);
-  NumberedInstructions.push_back(INSERT_SUBREG);
-  NumberedInstructions.push_back(IMPLICIT_DEF);
-  NumberedInstructions.push_back(SUBREG_TO_REG);
-  NumberedInstructions.push_back(COPY_TO_REGCLASS);
-  NumberedInstructions.push_back(DBG_VALUE);
-  for (inst_iterator II = inst_begin(), E = inst_end(); II != E; ++II)
-    if (&II->second != PHI &&
-        &II->second != INLINEASM &&
-        &II->second != DBG_LABEL &&
-        &II->second != EH_LABEL &&
-        &II->second != GC_LABEL &&
-        &II->second != KILL &&
-        &II->second != EXTRACT_SUBREG &&
-        &II->second != INSERT_SUBREG &&
-        &II->second != IMPLICIT_DEF &&
-        &II->second != SUBREG_TO_REG &&
-        &II->second != COPY_TO_REGCLASS &&
-        &II->second != DBG_VALUE)
-      NumberedInstructions.push_back(&II->second);
+  InstrsByEnum.push_back(PHI);
+  InstrsByEnum.push_back(INLINEASM);
+  InstrsByEnum.push_back(DBG_LABEL);
+  InstrsByEnum.push_back(EH_LABEL);
+  InstrsByEnum.push_back(GC_LABEL);
+  InstrsByEnum.push_back(KILL);
+  InstrsByEnum.push_back(EXTRACT_SUBREG);
+  InstrsByEnum.push_back(INSERT_SUBREG);
+  InstrsByEnum.push_back(IMPLICIT_DEF);
+  InstrsByEnum.push_back(SUBREG_TO_REG);
+  InstrsByEnum.push_back(COPY_TO_REGCLASS);
+  InstrsByEnum.push_back(DBG_VALUE);
+  
+  unsigned EndOfPredefines = InstrsByEnum.size();
+  
+  for (DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+       I = Insts.begin(), E = Insts.end(); I != E; ++I) {
+    const CodeGenInstruction *CGI = I->second;
+    if (CGI != PHI &&
+        CGI != INLINEASM &&
+        CGI != DBG_LABEL &&
+        CGI != EH_LABEL &&
+        CGI != GC_LABEL &&
+        CGI != KILL &&
+        CGI != EXTRACT_SUBREG &&
+        CGI != INSERT_SUBREG &&
+        CGI != IMPLICIT_DEF &&
+        CGI != SUBREG_TO_REG &&
+        CGI != COPY_TO_REGCLASS &&
+        CGI != DBG_VALUE)
+      InstrsByEnum.push_back(CGI);
+  }
+  
+  // All of the instructions are now in random order based on the map iteration.
+  // Sort them by name.
+  std::sort(InstrsByEnum.begin()+EndOfPredefines, InstrsByEnum.end(),
+            SortInstByName());
 }
 
 
@@ -404,6 +395,8 @@ ComplexPattern::ComplexPattern(Record *R) {
       Properties |= 1 << SDNPSideEffect;
     } else if (PropList[i]->getName() == "SDNPMemOperand") {
       Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
     } else {
       errs() << "Unsupported SD Node property '" << PropList[i]->getName()
              << "' on ComplexPattern '" << R->getName() << "'!\n";
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 1df74af36220..29264189b745 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -17,11 +17,11 @@
 #ifndef CODEGEN_TARGET_H
 #define CODEGEN_TARGET_H
 
-#include "llvm/Support/raw_ostream.h"
 #include "CodeGenRegisters.h"
 #include "CodeGenInstruction.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
 #include <algorithm>
-#include <map>
 
 namespace llvm {
 
@@ -43,7 +43,8 @@ enum SDNP {
   SDNPMayLoad,
   SDNPMayStore,
   SDNPSideEffect,
-  SDNPMemOperand
+  SDNPMemOperand,
+  SDNPVariadic
 };
 
 /// getValueType - Return the MVT::SimpleValueType that the specified TableGen
@@ -62,7 +63,7 @@ std::string getQualifiedName(const Record *R);
 class CodeGenTarget {
   Record *TargetRec;
 
-  mutable std::map<std::string, CodeGenInstruction> Instructions;
+  mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
   mutable std::vector<CodeGenRegister> Registers;
   mutable std::vector<CodeGenRegisterClass> RegisterClasses;
   mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
@@ -70,6 +71,8 @@ class CodeGenTarget {
   void ReadRegisterClasses() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
+  
+  mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
 public:
   CodeGenTarget();
 
@@ -183,37 +186,40 @@ public:
     return false;    
   }
 
-  /// getInstructions - Return all of the instructions defined for this target.
-  ///
-  const std::map<std::string, CodeGenInstruction> &getInstructions() const {
+private:
+  DenseMap<const Record*, CodeGenInstruction*> &getInstructions() const {
     if (Instructions.empty()) ReadInstructions();
     return Instructions;
   }
-  std::map<std::string, CodeGenInstruction> &getInstructions() {
+public:
+  
+  CodeGenInstruction &getInstruction(const Record *InstRec) const {
     if (Instructions.empty()) ReadInstructions();
-    return Instructions;
+    DenseMap<const Record*, CodeGenInstruction*>::iterator I =
+      Instructions.find(InstRec);
+    assert(I != Instructions.end() && "Not an instruction");
+    return *I->second;
   }
 
-  CodeGenInstruction &getInstruction(const std::string &Name) const {
-    const std::map<std::string, CodeGenInstruction> &Insts = getInstructions();
-    assert(Insts.count(Name) && "Not an instruction!");
-    return const_cast<CodeGenInstruction&>(Insts.find(Name)->second);
-  }
-
-  typedef std::map<std::string,
-                   CodeGenInstruction>::const_iterator inst_iterator;
-  inst_iterator inst_begin() const { return getInstructions().begin(); }
-  inst_iterator inst_end() const { return Instructions.end(); }
-
   /// getInstructionsByEnumValue - Return all of the instructions defined by the
   /// target, ordered by their enum value.
-  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
-                                                &NumberedInstructions);
-
+  const std::vector<const CodeGenInstruction*> &
+  getInstructionsByEnumValue() const {
+    if (InstrsByEnum.empty()) ComputeInstrsByEnum();
+    return InstrsByEnum;
+  }
 
+  typedef std::vector<const CodeGenInstruction*>::const_iterator inst_iterator;
+  inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
+  inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
+  
+  
   /// isLittleEndianEncoding - are instruction bit patterns defined as  [0..n]?
   ///
   bool isLittleEndianEncoding() const;
+  
+private:
+  void ComputeInstrsByEnum() const;
 };
 
 /// ComplexPattern - ComplexPattern info, corresponding to the ComplexPattern
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 73feac165120..044086add5b2 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -25,7 +25,6 @@ using namespace llvm;
 /// patterns before small ones.  This is used to determine the size of a
 /// pattern.
 static unsigned getPatternSize(TreePatternNode *P, CodeGenDAGPatterns &CGP) {
-  assert(P->hasTypeSet() && "Not a valid pattern node to size!");
   unsigned Size = 3;  // The node itself.
   // If the root node is a ConstantSDNode, increases its size.
   // e.g. (set R32:$dst, 0).
@@ -49,7 +48,8 @@ static unsigned getPatternSize(TreePatternNode *P, CodeGenDAGPatterns &CGP) {
   // Count children in the count if they are also nodes.
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
     TreePatternNode *Child = P->getChild(i);
-    if (!Child->isLeaf() && Child->getType() != MVT::Other)
+    if (!Child->isLeaf() && Child->getNumTypes() &&
+        Child->getType(0) != MVT::Other)
       Size += getPatternSize(Child, CGP);
     else if (Child->isLeaf()) {
       if (dynamic_cast<IntInit*>(Child->getLeafValue())) 
@@ -75,7 +75,7 @@ static unsigned getResultPatternCost(TreePatternNode *P,
   Record *Op = P->getOperator();
   if (Op->isSubClassOf("Instruction")) {
     Cost++;
-    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op->getName());
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
     if (II.usesCustomInserter)
       Cost += 10;
   }
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 22d2fe848096..cd3fad131eca 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -357,14 +357,13 @@ bool CheckOpcodeMatcher::isContradictoryImpl(const Matcher *M) const {
   // ISD::STORE will never be true at the same time a check for Type i32 is.
   if (const CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(M)) {
     // FIXME: What result is this referring to?
-    unsigned NodeType;
+    MVT::SimpleValueType NodeType;
     if (getOpcode().getNumResults() == 0)
       NodeType = MVT::isVoid;
     else
       NodeType = getOpcode().getKnownType();
-    if (NodeType != EEVT::isUnknown)
-      return TypesAreContradictory((MVT::SimpleValueType)NodeType,
-                                   CT->getType());
+    if (NodeType != MVT::Other)
+      return TypesAreContradictory(NodeType, CT->getType());
   }
   
   return false;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 375df6b210a5..da6f6afd82dd 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -409,8 +409,10 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   // need to do a type check.  Emit the check, apply the tyep to NodeNoTypes and
   // reinfer any correlated types.
   bool DoTypeCheck = false;
-  if (NodeNoTypes->getExtType() != N->getExtType()) {
-    NodeNoTypes->setType(N->getExtType());
+  if (NodeNoTypes->getNumTypes() != 0 &&
+      NodeNoTypes->getExtType(0) != N->getExtType(0)) {
+    assert(NodeNoTypes->getNumTypes() == 1 && "FIXME: Handle multiple results");
+    NodeNoTypes->setType(0, N->getExtType(0));
     InferPossibleTypes();
     DoTypeCheck = true;
   }
@@ -442,8 +444,10 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
     AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
   
-  if (DoTypeCheck)
-    AddMatcher(new CheckTypeMatcher(N->getType()));
+  if (DoTypeCheck) {
+    assert(N->getNumTypes() == 1);
+    AddMatcher(new CheckTypeMatcher(N->getType(0)));
+  }
 }
 
 /// EmitMatcherCode - Generate the code that matches the predicate of this
@@ -567,7 +571,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   assert(N->isLeaf() && "Must be a leaf");
   
   if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
-    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType()));
+    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
     ResultOps.push_back(NextRecordedOperandNo++);
     return;
   }
@@ -575,13 +579,13 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
     if (DI->getDef()->isSubClassOf("Register")) {
-      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType()));
+      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
     
     if (DI->getDef()->getName() == "zero_reg") {
-      AddMatcher(new EmitRegisterMatcher(0, N->getType()));
+      AddMatcher(new EmitRegisterMatcher(0, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
@@ -627,7 +631,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
                                SmallVectorImpl<unsigned> &OutputOps) {
   Record *Op = N->getOperator();
   const CodeGenTarget &CGT = CGP.getTargetInfo();
-  CodeGenInstruction &II = CGT.getInstruction(Op->getName());
+  CodeGenInstruction &II = CGT.getInstruction(Op);
   const DAGInstruction &Inst = CGP.getInstruction(Op);
   
   // If we can, get the pattern for the instruction we're generating.  We derive
@@ -698,7 +702,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
     // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src).
     for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
       AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
-                                                  PhysRegInputs[i].first));
+                                          PhysRegInputs[i].first));
     // Even if the node has no other flag inputs, the resultant node must be
     // flagged to the CopyFromReg nodes we just generated.
     TreeHasInFlag = true;
@@ -708,12 +712,12 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   
   // Determine the result types.
   SmallVector<MVT::SimpleValueType, 4> ResultVTs;
-  if (NumResults != 0 && N->getType() != MVT::isVoid) {
+  if (N->getNumTypes()) {
     // FIXME2: If the node has multiple results, we should add them.  For now,
     // preserve existing behavior?!
-    ResultVTs.push_back(N->getType());
+    assert(N->getNumTypes() == 1);
+    ResultVTs.push_back(N->getType(0));
   }
-
   
   // If this is the root instruction of a pattern that has physical registers in
   // its result pattern, add output VTs for them.  For example, X86 has:
@@ -721,16 +725,26 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // This also handles implicit results like:
   //   (implicit EFLAGS)
   if (isRoot && Pattern.getDstRegs().size() != 0) {
-    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i)
-      if (Pattern.getDstRegs()[i]->isSubClassOf("Register"))
-        ResultVTs.push_back(getRegisterValueType(Pattern.getDstRegs()[i], CGT));
+    // If the root came from an implicit def in the instruction handling stuff,
+    // don't re-add it.
+    Record *HandledReg = 0;
+    if (NumResults == 0 && N->getNumTypes() != 0 &&
+        !II.ImplicitDefs.empty())
+      HandledReg = II.ImplicitDefs[0];
+    
+    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
+      Record *Reg = Pattern.getDstRegs()[i];
+      if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
+      ResultVTs.push_back(getRegisterValueType(Reg, CGT));
+    }
   }
 
-  // FIXME2: Instead of using the isVariadic flag on the instruction, we should
-  // have an SDNP that indicates variadicism.  The TargetInstrInfo isVariadic
-  // property should be inferred from this when an instruction has a pattern.
+  // If this is the root of the pattern and the pattern we're matching includes
+  // a node that is variadic, mark the generated node as variadic so that it
+  // gets the excess operands from the input DAG.
   int NumFixedArityOperands = -1;
-  if (isRoot && II.isVariadic)
+  if (isRoot &&
+      (Pattern.getSrcPattern()->NodeHasProperty(SDNPVariadic, CGP)))
     NumFixedArityOperands = Pattern.getSrcPattern()->getNumChildren();
   
   // If this is the root node and any of the nodes matched nodes in the input
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 61b9b1583b25..a195c0b8d6dc 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -108,8 +108,8 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
   if (Target.getName() == "X86") {
     DisassemblerTables Tables;
   
-    std::vector<const CodeGenInstruction*> numberedInstructions;
-    Target.getInstructionsByEnumValue(numberedInstructions);
+    const std::vector<const CodeGenInstruction*> &numberedInstructions =
+      Target.getInstructionsByEnumValue();
     
     for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
       RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 214941071ebb..f83ab4868259 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -549,8 +549,8 @@ static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
 /// @arg target     - The CodeGenTarget to use as a source of instructions
 static void populateInstInfo(CompoundConstantEmitter &infoArray,
                              CodeGenTarget &target) {
-  std::vector<const CodeGenInstruction*> numberedInstructions;
-  target.getInstructionsByEnumValue(numberedInstructions);
+  const std::vector<const CodeGenInstruction*> &numberedInstructions =
+    target.getInstructionsByEnumValue();
   
   unsigned int index;
   unsigned int numInstructions = numberedInstructions.size();
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index b94ded645261..23f09a5bd316 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -70,13 +70,16 @@ struct OperandsSignature {
     for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
       TreePatternNode *Op = InstPatNode->getChild(i);
       // For now, filter out any operand with a predicate.
-      if (!Op->getPredicateFns().empty())
-        return false;
       // For now, filter out any operand with multiple values.
-      assert(Op->hasTypeSet() && "Type infererence not done?");
-      // For now, all the operands must have the same type.
-      if (Op->getType() != VT)
+      if (!Op->getPredicateFns().empty() ||
+          Op->getNumTypes() != 1)
         return false;
+      
+      assert(Op->hasTypeSet(0) && "Type infererence not done?");
+      // For now, all the operands must have the same type.
+      if (Op->getType(0) != VT)
+        return false;
+      
       if (!Op->isLeaf()) {
         if (Op->getOperator()->getName() == "imm") {
           Operands.push_back("i");
@@ -254,7 +257,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     Record *Op = Dst->getOperator();
     if (!Op->isSubClassOf("Instruction"))
       continue;
-    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op->getName());
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
     if (II.OperandList.empty())
       continue;
 
@@ -295,10 +298,14 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
 
     Record *InstPatOp = InstPatNode->getOperator();
     std::string OpcodeName = getOpcodeName(InstPatOp, CGP);
-    MVT::SimpleValueType RetVT = InstPatNode->getType();
+    assert(InstPatNode->getNumTypes() <= 1);
+    MVT::SimpleValueType RetVT = MVT::isVoid;
+    if (InstPatNode->getNumTypes()) RetVT = InstPatNode->getType(0);
     MVT::SimpleValueType VT = RetVT;
-    if (InstPatNode->getNumChildren())
-      VT = InstPatNode->getChild(0)->getType();
+    if (InstPatNode->getNumChildren()) {
+      assert(InstPatNode->getChild(0)->getNumTypes() == 1);
+      VT = InstPatNode->getChild(0)->getType(0);
+    }
 
     // For now, filter out instructions which just set a register to
     // an Operand or an immediate, like MOV32ri.
diff --git a/utils/TableGen/InstrEnumEmitter.cpp b/utils/TableGen/InstrEnumEmitter.cpp
index d1e7f3dd35d6..47a8474c35ec 100644
--- a/utils/TableGen/InstrEnumEmitter.cpp
+++ b/utils/TableGen/InstrEnumEmitter.cpp
@@ -26,22 +26,15 @@ void InstrEnumEmitter::run(raw_ostream &OS) {
   CodeGenTarget Target;
 
   // We must emit the PHI opcode first...
-  std::string Namespace;
-  for (CodeGenTarget::inst_iterator II = Target.inst_begin(), 
-       E = Target.inst_end(); II != E; ++II) {
-    if (II->second.Namespace != "TargetOpcode") {
-      Namespace = II->second.Namespace;
-      break;
-    }
-  }
+  std::string Namespace = Target.getInstNamespace();
   
   if (Namespace.empty()) {
     fprintf(stderr, "No instructions defined!\n");
     exit(1);
   }
 
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   OS << "namespace " << Namespace << " {\n";
   OS << "  enum {\n";
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 898c92af8da9..8f7550b84810 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -39,16 +39,10 @@ static void PrintBarriers(std::vector<Record*> &Barriers,
 // Instruction Itinerary Information.
 //===----------------------------------------------------------------------===//
 
-struct RecordNameComparator {
-  bool operator()(const Record *Rec1, const Record *Rec2) const {
-    return Rec1->getName() < Rec2->getName();
-  }
-};
-
 void InstrInfoEmitter::GatherItinClasses() {
   std::vector<Record*> DefList =
   Records.getAllDerivedDefinitions("InstrItinClass");
-  std::sort(DefList.begin(), DefList.end(), RecordNameComparator());
+  std::sort(DefList.begin(), DefList.end(), LessRecord());
   
   for (unsigned i = 0, N = DefList.size(); i < N; i++)
     ItinClassMap[DefList[i]->getName()] = i;
@@ -149,7 +143,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
   const CodeGenTarget &Target = CDP.getTargetInfo();
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
        E = Target.inst_end(); II != E; ++II) {
-    std::vector<std::string> OperandInfo = GetOperandInfo(II->second);
+    std::vector<std::string> OperandInfo = GetOperandInfo(**II);
     unsigned &N = OperandInfoIDs[OperandInfo];
     if (N != 0) continue;
     
@@ -214,7 +208,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   // Emit all of the instruction's implicit uses and defs.
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
          E = Target.inst_end(); II != E; ++II) {
-    Record *Inst = II->second.TheDef;
+    Record *Inst = (*II)->TheDef;
     std::vector<Record*> Uses = Inst->getValueAsListOfDefs("Uses");
     if (!Uses.empty()) {
       unsigned &IL = EmittedLists[Uses];
@@ -244,8 +238,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   //
   OS << "\nstatic const TargetInstrDesc " << TargetName
      << "Insts[] = {\n";
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  Target.getInstructionsByEnumValue(NumberedInstructions);
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
 
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i)
     emitRecord(*NumberedInstructions[i], i, InstrInfo, EmittedLists,
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index f9e2fe8a397a..b9facb4c2a6e 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -646,18 +646,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     if (LHSs && RHSs) {
       DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
       DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
-      if (LOp->getDef() != ROp->getDef()) {
-        bool LIsOps =
-          LOp->getDef()->getName() == "outs" ||
-          LOp->getDef()->getName() != "ins" ||
-          LOp->getDef()->getName() != "defs";
-        bool RIsOps =
-          ROp->getDef()->getName() == "outs" ||
-          ROp->getDef()->getName() != "ins" ||
-          ROp->getDef()->getName() != "defs";
-        if (!LIsOps || !RIsOps)
-          throw "Concated Dag operators do not match!";
-      }
+      if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+        throw "Concated Dag operators do not match!";
       std::vector<Init*> Args;
       std::vector<std::string> ArgNames;
       for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 1fa3fdfc71cf..f67794e4fa96 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -106,11 +106,10 @@ if [ "x$RC_ProjectName" = "xllvmCore_EmbeddedHosted" ]; then
   # Try to use the platform llvm-gcc. Fall back to gcc if it's not available.
   for prog in gcc g++ ; do
     P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
-# FIXME: Uncomment once llvm-gcc works for this
-#    T=`xcrun -find llvm-${prog}`
-#    if [ "x$T" = "x" ] ; then
+    T=`xcrun -find llvm-${prog}`
+    if [ "x$T" = "x" ] ; then
       T=`xcrun -sdk $SDKROOT -find ${prog}`
-#    fi
+    fi
     echo '#!/bin/sh' > $P || exit 1
     echo 'exec '$T' -arch armv6 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
     chmod a+x $P || exit 1
diff --git a/utils/lit/lit/ExampleTests/Clang/lit.cfg b/utils/lit/lit/ExampleTests/Clang/lit.cfg
index 114ac60de640..1e1e807f3676 100644
--- a/utils/lit/lit/ExampleTests/Clang/lit.cfg
+++ b/utils/lit/lit/ExampleTests/Clang/lit.cfg
@@ -41,40 +41,7 @@ def inferClang(PATH):
 
     return clang
 
-def inferClangCC(clang, PATH):
-    clangcc = os.getenv('CLANGCC')
-
-    # If the user set clang in the environment, definitely use that and don't
-    # try to validate.
-    if clangcc:
-        return clangcc
-
-    # Otherwise try adding -cc since we expect to be looking in a build
-    # directory.
-    if clang.endswith('.exe'):
-        clangccName = clang[:-4] + '-cc.exe'
-    else:
-        clangccName = clang + '-cc'
-    clangcc = lit.util.which(clangccName, PATH)
-    if not clangcc:
-        # Otherwise ask clang.
-        res = lit.util.capture([clang, '-print-prog-name=clang-cc'])
-        res = res.strip()
-        if res and os.path.exists(res):
-            clangcc = res
-
-    if not clangcc:
-        lit.fatal("couldn't find 'clang-cc' program, try setting "
-                  "CLANGCC in your environment")
-
-    return clangcc
-
 clang = inferClang(config.environment['PATH'])
 if not lit.quiet:
     lit.note('using clang: %r' % clang)
 config.substitutions.append( (' clang ', ' ' + clang + ' ') )
-
-clang_cc = inferClangCC(clang, config.environment['PATH'])
-if not lit.quiet:
-    lit.note('using clang-cc: %r' % clang_cc)
-config.substitutions.append( (' clang-cc ', ' ' + clang_cc + ' ') )
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 0e0a4931dca7..9b62470902bd 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -15,7 +15,7 @@ class LitConfig:
     import Util as util
 
     def __init__(self, progname, path, quiet,
-                 useValgrind, valgrindArgs,
+                 useValgrind, valgrindLeakCheck, valgrindArgs,
                  useTclAsSh,
                  noExecute, debug, isWindows,
                  params):
@@ -25,7 +25,8 @@ class LitConfig:
         self.path = list(map(str, path))
         self.quiet = bool(quiet)
         self.useValgrind = bool(useValgrind)
-        self.valgrindArgs = list(valgrindArgs)
+        self.valgrindLeakCheck = bool(valgrindLeakCheck)
+        self.valgrindUserArgs = list(valgrindArgs)
         self.useTclAsSh = bool(useTclAsSh)
         self.noExecute = noExecute
         self.debug = debug
@@ -36,6 +37,22 @@ class LitConfig:
         self.numErrors = 0
         self.numWarnings = 0
 
+        self.valgrindArgs = []
+        self.valgrindTriple = ""
+        if self.useValgrind:
+            self.valgrindTriple = "-vg"
+            self.valgrindArgs = ['valgrind', '-q', '--run-libc-freeres=no',
+                                 '--tool=memcheck', '--trace-children=yes',
+                                 '--error-exitcode=123']
+            if self.valgrindLeakCheck:
+                self.valgrindTriple += "_leak"
+                self.valgrindArgs.append('--leak-check=full')
+            else:
+                # The default is 'summary'.
+                self.valgrindArgs.append('--leak-check=no')
+            self.valgrindArgs.extend(self.valgrindUserArgs)
+
+
     def load_config(self, config, path):
         """load_config(config, path) - Load a config object from an alternate
         path."""
diff --git a/utils/lit/lit/LitFormats.py b/utils/lit/lit/LitFormats.py
index 270f08707041..e86f103fe6b2 100644
--- a/utils/lit/lit/LitFormats.py
+++ b/utils/lit/lit/LitFormats.py
@@ -1,3 +1,2 @@
 from TestFormats import GoogleTest, ShTest, TclTest
 from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
-
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index 6ab3f9c4626d..433e39a62780 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -73,12 +73,7 @@ class GoogleTest(object):
 
         cmd = [testPath, '--gtest_filter=' + testName]
         if litConfig.useValgrind:
-            valgrindArgs = ['valgrind', '-q',
-                            '--tool=memcheck', '--trace-children=yes',
-                            '--error-exitcode=123']
-            valgrindArgs.extend(litConfig.valgrindArgs)
-
-            cmd = valgrindArgs + cmd
+            cmd = litConfig.valgrindArgs + cmd
 
         out, err, exitCode = TestRunner.executeCommand(
             cmd, env=test.config.environment)
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 20fbc6c13a9f..29adff222983 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -252,6 +252,14 @@ def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
         except:
             return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
 
+    if litConfig.useValgrind:
+        for pipeline in cmds:
+            if pipeline.commands:
+                # Only valgrind the first command in each pipeline, to avoid
+                # valgrinding things like grep, not, and FileCheck.
+                cmd = pipeline.commands[0]
+                cmd.args = litConfig.valgrindArgs + cmd.args
+
     cmd = cmds[0]
     for c in cmds[1:]:
         cmd = ShUtil.Seq(cmd, '&&', c)
@@ -327,12 +335,7 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
         if litConfig.useValgrind:
             # FIXME: Running valgrind on sh is overkill. We probably could just
             # run on clang with no real loss.
-            valgrindArgs = ['valgrind', '-q',
-                            '--tool=memcheck', '--trace-children=yes',
-                            '--error-exitcode=123']
-            valgrindArgs.extend(litConfig.valgrindArgs)
-
-            command = valgrindArgs + command
+            command = litConfig.valgrindArgs + command
 
     return executeCommand(command, cwd=cwd, env=test.config.environment)
 
diff --git a/utils/lit/lit/lit.py b/utils/lit/lit/lit.py
index f1f19c4ddaea..e80075478a6d 100755
--- a/utils/lit/lit/lit.py
+++ b/utils/lit/lit/lit.py
@@ -362,6 +362,9 @@ def main():
     group.add_option("", "--vg", dest="useValgrind",
                      help="Run tests under valgrind",
                      action="store_true", default=False)
+    group.add_option("", "--vg-leak", dest="valgrindLeakCheck",
+                     help="Check for memory leaks under valgrind",
+                     action="store_true", default=False)
     group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
                      help="Specify an extra argument for valgrind",
                      type=str, action="append", default=[])
@@ -411,7 +414,14 @@ def main():
         gSiteConfigName = '%s.site.cfg' % opts.configPrefix
 
     if opts.numThreads is None:
-        opts.numThreads = Util.detectCPUs()
+# Python <2.5 has a race condition causing lit to always fail with numThreads>1
+# http://bugs.python.org/issue1731717
+# I haven't seen this bug occur with 2.5.2 and later, so only enable multiple
+# threads by default there.
+       if sys.hexversion >= 0x2050200:
+               opts.numThreads = Util.detectCPUs()
+       else:
+               opts.numThreads = 1
 
     inputs = args
 
@@ -429,6 +439,7 @@ def main():
                                     path = opts.path,
                                     quiet = opts.quiet,
                                     useValgrind = opts.useValgrind,
+                                    valgrindLeakCheck = opts.valgrindLeakCheck,
                                     valgrindArgs = opts.valgrindArgs,
                                     useTclAsSh = opts.useTclAsSh,
                                     noExecute = opts.noExecute,