Import LLVM r73340.

2009-06-14 09:23:33 +00:00 · 2009-06-14 09:23:33 +00:00 · 600c6fa13d
commit 600c6fa13d
parent 93338c1971
115 changed files with 3938 additions and 1122 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -26,6 +26,7 @@ set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
 set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
 set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
+set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )

 set(LLVM_ALL_TARGETS
  Alpha
@ -186,11 +187,26 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )

 if( MSVC )
+  # List of valid CRTs for MSVC
+  set(MSVC_CRT
+    MD
+    MDd)
+
+  set(LLVM_USE_CRT "" CACHE STRING "Specify VC++ CRT to use for debug/release configurations.")
  add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
  add_llvm_definitions( -D_SCL_SECURE_NO_WARNINGS -DCRT_NONSTDC_NO_WARNINGS )
  add_llvm_definitions( -D_SCL_SECURE_NO_DEPRECATE )
  add_llvm_definitions( -wd4146 -wd4503 -wd4996 -wd4800 -wd4244 -wd4624 )
  add_llvm_definitions( -wd4355 -wd4715 -wd4180 -wd4345 -wd4224 )
+
+  if (NOT ${LLVM_USE_CRT} STREQUAL "")
+    list(FIND MSVC_CRT ${LLVM_USE_CRT} idx)
+    if (idx LESS 0)
+      message(FATAL_ERROR "Invalid value for LLVM_USE_CRT: ${LLVM_USE_CRT}. Valid options are one of: ${MSVC_CRT}")
+    endif (idx LESS 0)
+    add_llvm_definitions("/${LLVM_USE_CRT}")
+    message(STATUS "Using VC++ CRT: ${LLVM_USE_CRT}")
+  endif (NOT ${LLVM_USE_CRT} STREQUAL "")
 endif( MSVC )

 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
@ -207,6 +223,8 @@ set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${LLVM_LIBS} )

 set(LLVM_TABLEGEN "tblgen" CACHE
  STRING "Native TableGen executable. Saves building one when cross-compiling.")
+# Effective tblgen executable to be used:
+set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN})

 add_subdirectory(utils/TableGen)

--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@ -3,7 +3,7 @@ project.  If you have contributed a patch or made some other contribution to
 LLVM, please submit a patch to this file to add yourself, and it will be
 done!

-The list is sorted by name and formatted to allow easy grepping and
+The list is sorted by surname and formatted to allow easy grepping and
 beautification by scripts.  The fields are: name (N), email (E), web-address
 (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
 (S).
@ -148,10 +148,6 @@ N: Patrick Jenkins
 E: patjenk@wam.umd.edu
 D: Nightly Tester

-N: Brad Jones
-E: kungfoomaster@nondot.org
-D: Support for packed types
-
 N: Dale Johannesen
 E: dalej@apple.com
 D: ARM constant islands improvements
@ -160,6 +156,10 @@ D: Rewrite X87 back end
 D: Use APFloat for floating point constants widely throughout compiler
 D: Implement X87 long double

+N: Brad Jones
+E: kungfoomaster@nondot.org
+D: Support for packed types
+
 N: Eric Kidd
 W: http://randomhacks.net/
 D: llvm-config script
@ -231,6 +231,13 @@ N: Scott Michel
 E: scottm@aero.org
 D: Added STI Cell SPU backend.

+N: Edward O'Callaghan
+E: eocallaghan@auroraux.org
+W: http://www.auroraux.org
+D: Add Clang support with various other improvements to utils/NewNightlyTest.pl
+D: Fix and maintain Solaris & AuroraUX support for llvm, various build warnings
+D: and error clean ups.
+
 N: Morten Ofstad
 E: morten@hue.no
 D: Visual C++ compatibility fixes
@ -266,6 +273,10 @@ N: Arnold Schwaighofer
 E: arnold.schwaighofer@gmail.com
 D: Tail call optimization for the x86 backend

+N: Shantonu Sen
+E: ssen@apple.com
+D: Miscellaneous bug fixes
+
 N: Anand Shukla
 E: ashukla@cs.uiuc.edu
 D: The `paths' pass
@ -290,8 +301,4 @@ D: Thread Local Storage implementation

 N: Bill Wendling
 E: isanbard@gmail.com
-D: Machine LICM
-D: Darwin exception handling
-D: MMX & SSSE3 instructions
-D: SPEC2006 support
-
+D: Bunches of stuff
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@ -10,8 +10,8 @@ macro(add_llvm_library name)
    add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
  endif( LLVM_COMMON_DEPENDS )
  install(TARGETS ${name}
-    LIBRARY DESTINATION lib
-    ARCHIVE DESTINATION lib)
+    LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+    ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
 endmacro(add_llvm_library name)


--- a/cmake/modules/AddPartiallyLinkedObject.cmake
+++ b/cmake/modules/AddPartiallyLinkedObject.cmake
@ -38,5 +38,5 @@ macro(add_partially_linked_object lib)
    set( llvm_lib_targets ${llvm_lib_targets} ${tnplo} PARENT_SCOPE )
  endif( )
  install(FILES ${pll}
-    DESTINATION lib)
+    DESTINATION lib${LLVM_LIBDIR_SUFFIX})
 endmacro(add_partially_linked_object lib)
--- a/cmake/modules/CrossCompileLLVM.cmake
+++ b/cmake/modules/CrossCompileLLVM.cmake
@ -1,7 +1,7 @@

 if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
  set(CX_NATIVE_TG_DIR "${CMAKE_BINARY_DIR}/native")
-  set(LLVM_TABLEGEN "${CX_NATIVE_TG_DIR}/bin/tblgen")
+  set(LLVM_TABLEGEN_EXE "${CX_NATIVE_TG_DIR}/bin/tblgen")

  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}
    COMMAND ${CMAKE_COMMAND} -E make_directory ${CX_NATIVE_TG_DIR}
@ -13,12 +13,12 @@ if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
    DEPENDS ${CX_NATIVE_TG_DIR}
    COMMENT "Configuring native TableGen...")

-  add_custom_command(OUTPUT ${LLVM_TABLEGEN}
+  add_custom_command(OUTPUT ${LLVM_TABLEGEN_EXE}
    COMMAND ${CMAKE_BUILD_TOOL}
    DEPENDS ${CX_NATIVE_TG_DIR}/CMakeCache.txt
    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}/utils/TableGen
    COMMENT "Building native TableGen...")
-  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN})
+  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN_EXE})

  add_dependencies(tblgen NativeTableGen)

--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@ -6,11 +6,11 @@ macro(tablegen ofn)
  file(GLOB all_tds "*.td")

  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    COMMAND ${LLVM_TABLEGEN} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
    -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR}
    ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS} 
    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    DEPENDS ${LLVM_TABLEGEN} ${all_tds}
+    DEPENDS tblgen ${all_tds}
    COMMENT "Building ${ofn}.tmp..."
    )
  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
--- a/docs/CMake.html
+++ b/docs/CMake.html
@ -217,6 +217,11 @@
  <dd>Path where LLVM will be installed if "make install" is invoked
    or the "INSTALL" target is built.</dd>

+  <dt><b>LLVM_LIBDIR_SUFFIX</b>:STRING</dt>
+  <dd>Extra suffix to append to the directory where libraries are to
+    be installed. On a 64-bit architecture, one could use
+    -DLLVM_LIBDIR_SUFFIX=64 to install libraries to /usr/lib64.</dd>
+
  <dt><b>CMAKE_C_FLAGS</b>:STRING</dt>
  <dd>Extra flags to use when compiling C source files.</dd>

@ -296,7 +301,13 @@

 <div class="doc_text">

-<p>TODO</p>
+<p>See <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling">this
+    wiki page</a> for generic instructions on how to cross-compile
+    with CMake. It goes into detailed explanations and may seem
+    daunting, but it is not. On the wiki page there are several
+    examples including toolchain files. Go directly to
+    <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling#Information_how_to_set_up_various_cross_compiling_toolchains">this
+    section</a> for a quick solution.</p>

 </div>

--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@ -1091,19 +1091,27 @@ stack before the local variables that's checked upon return from the function to
 see if it has been overwritten. A heuristic is used to determine if a function
 needs stack protectors or not.

-<p>If a function that has an <tt>ssp</tt> attribute is inlined into a function
+<br><br>If a function that has an <tt>ssp</tt> attribute is inlined into a function
 that doesn't have an <tt>ssp</tt> attribute, then the resulting function will
-have an <tt>ssp</tt> attribute.</p></dd>
+have an <tt>ssp</tt> attribute.</dd>

 <dt><tt>sspreq</tt></dt>
 <dd>This attribute indicates that the function should <em>always</em> emit a
 stack smashing protector. This overrides the <tt><a href="#ssp">ssp</a></tt>
 function attribute.

-<p>If a function that has an <tt>sspreq</tt> attribute is inlined into a
+If a function that has an <tt>sspreq</tt> attribute is inlined into a
 function that doesn't have an <tt>sspreq</tt> attribute or which has
 an <tt>ssp</tt> attribute, then the resulting function will have
-an <tt>sspreq</tt> attribute.</p></dd>
+an <tt>sspreq</tt> attribute.</dd>
+
+<dt><tt>noredzone</tt></dt>
+<dd>This attribute indicates that the code generator should not enforce red zone
+mandated by target specific ABI.</dd>
+
+<dt><tt>noimplicitfloat</tt></dt>
+<dd>This attributes disables implicit floating point instructions.</dd>
+
 </dl>

 </div>
@ -1177,6 +1185,9 @@ aspect of the data layout.  The specifications accepted are as follows: </p>
  <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
  <dd>This specifies the alignment for an aggregate type of a given bit
  <i>size</i>.</dd>
+  <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for a stack object of a given bit
+  <i>size</i>.</dd>
 </dl>
 <p>When constructing the data layout for a given target, LLVM starts with a
 default set of specifications which are then (possibly) overriden by the
@ -1196,6 +1207,7 @@ are given in this list:</p>
  <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
  <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
  <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
+  <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
 </ul>
 <p>When LLVM is determining the alignment for a given type, it uses the 
 following rules:</p>
@ -7209,7 +7221,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )

  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-05 00:49:04 +0200 (Fri, 05 Jun 2009) $
+  Last modified: $Date: 2009-06-12 21:45:19 +0200 (Fri, 12 Jun 2009) $
 </address>

 </body>
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@ -371,8 +371,11 @@ supported include:</p>
  <dd>string value</dd>
 <dt><tt>[{ ... }]</tt></dt>
  <dd>code fragment</dd>
-<dt><tt>[ X, Y, Z ]</tt></dt>
-  <dd>list value.</dd>
+<dt><tt>[ X, Y, Z ]<type></tt></dt>
+  <dd>list value.  <type> is the type of the list 
+element and is usually optional.  In rare cases,
+TableGen is unable to deduce the element type in
+which case the user must specify it explicitly.</dd>
 <dt><tt>{ a, b, c }</tt></dt>
  <dd>initializer for a "bits&lt;3&gt;" value</dd>
 <dt><tt>value</tt></dt>
@ -778,7 +781,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>

  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-19 00:14:45 +0200 (Tue, 19 May 2009) $
+  Last modified: $Date: 2009-06-09 20:31:17 +0200 (Tue, 09 Jun 2009) $
 </address>

 </body>
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@ -393,6 +393,7 @@ namespace llvm {
    SCEVHandle getTruncateExpr(const SCEVHandle &Op, const Type *Ty);
    SCEVHandle getZeroExtendExpr(const SCEVHandle &Op, const Type *Ty);
    SCEVHandle getSignExtendExpr(const SCEVHandle &Op, const Type *Ty);
+    SCEVHandle getAnyExtendExpr(const SCEVHandle &Op, const Type *Ty);
    SCEVHandle getAddExpr(std::vector<SCEVHandle> &Ops);
    SCEVHandle getAddExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) {
      std::vector<SCEVHandle> Ops;
@ -465,6 +466,12 @@ namespace llvm {
    /// it is sign extended.  The conversion must not be narrowing.
    SCEVHandle getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty);

+    /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+    /// the input value to the specified type. If the type must be extended,
+    /// it is extended with unspecified bits. The conversion must not be
+    /// narrowing.
+    SCEVHandle getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty);
+
    /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
    /// input value to the specified type.  The conversion must not be
    /// widening.
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@ -61,7 +61,8 @@ const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;

-/// @brief Attributes that only apply to function.
+/// @brief Attributes that may be applied to the function itself.  These cannot
+/// be used on return values or function parameters.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
  NoRedZone | NoImplicitFloat;
@ -186,7 +187,7 @@ public:

  /// getFnAttributes - The function attributes are returned.
  Attributes getFnAttributes() const {
-    return getAttributes(~0);
+    return getAttributes(~0U);
  }
  
  /// paramHasAttr - Return true if the specified parameter index has the
--- a/include/llvm/CodeGen/BinaryObject.h
+++ b/include/llvm/CodeGen/BinaryObject.h
@ -0,0 +1,325 @@
+//===-- llvm/CodeGen/BinaryObject.h - Binary Object. -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a Binary Object Aka. "blob" for holding data from code
+// generators, ready for data to the object module code writters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BINARYOBJECT_H
+#define LLVM_CODEGEN_BINARYOBJECT_H
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class MachineRelocation;
+typedef std::vector<uint8_t> BinaryData;
+
+class BinaryObject {
+protected:
+  std::string Name;
+  bool IsLittleEndian;
+  bool Is64Bit;
+  BinaryData Data;
+  std::vector<MachineRelocation> Relocations;
+
+public:
+  /// Constructors and destructor
+  BinaryObject() {}
+
+  BinaryObject(bool isLittleEndian, bool is64Bit)
+    : IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  BinaryObject(const std::string &name, bool isLittleEndian, bool is64Bit)
+    : Name(name), IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  ~BinaryObject() {}
+
+  /// getName - get name of BinaryObject
+  inline std::string getName() const { return Name; }
+
+  /// get size of binary data
+  size_t size() const {
+    return Data.size();
+  }
+
+  /// get binary data
+  BinaryData& getData() {
+    return Data;
+  }
+
+  /// get machine relocations
+  const std::vector<MachineRelocation>& getRelocations() const {
+    return Relocations;
+  }
+
+  /// emitByte - This callback is invoked when a byte needs to be
+  /// written to the data stream.
+  inline void emitByte(uint8_t B) {
+    Data.push_back(B);
+  }
+
+  /// emitWord16 - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16(uint16_t W) {
+    if (IsLittleEndian)
+      emitWord16LE(W);
+    else
+      emitWord16BE(W);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16LE(uint16_t W) {
+    Data.push_back((W >> 0) & 255);
+    Data.push_back((W >> 8) & 255);
+  }
+
+  /// emitWord16BE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16BE(uint16_t W) {
+    Data.push_back((W >> 8) & 255);
+    Data.push_back((W >> 0) & 255);
+  }
+
+  /// emitWord - This callback is invoked when a word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord(uint64_t W) {
+    if (!Is64Bit)
+      emitWord32(W);
+    else
+      emitWord64(W);
+  }
+
+  /// emitWord32 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord32(uint32_t W) {
+    if (IsLittleEndian)
+      emitWordLE(W);
+    else
+      emitWordBE(W);
+  }
+
+  /// emitWord64 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord64(uint64_t W) {
+    if (IsLittleEndian)
+      emitDWordLE(W);
+    else
+      emitDWordBE(W);
+  }
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitWordLE(uint32_t W) {
+    Data.push_back((W >>  0) & 255);
+    Data.push_back((W >>  8) & 255);
+    Data.push_back((W >> 16) & 255);
+    Data.push_back((W >> 24) & 255);
+  }
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in big-endian format.
+  ///
+  inline void emitWordBE(uint32_t W) {
+    Data.push_back((W >> 24) & 255);
+    Data.push_back((W >> 16) & 255);
+    Data.push_back((W >>  8) & 255);
+    Data.push_back((W >>  0) & 255);
+  }
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitDWordLE(uint64_t W) {
+    Data.push_back(unsigned(W >>  0) & 255);
+    Data.push_back(unsigned(W >>  8) & 255);
+    Data.push_back(unsigned(W >> 16) & 255);
+    Data.push_back(unsigned(W >> 24) & 255);
+    Data.push_back(unsigned(W >> 32) & 255);
+    Data.push_back(unsigned(W >> 40) & 255);
+    Data.push_back(unsigned(W >> 48) & 255);
+    Data.push_back(unsigned(W >> 56) & 255);
+  }
+
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in big-endian format.
+  inline void emitDWordBE(uint64_t W) {
+    Data.push_back(unsigned(W >> 56) & 255);
+    Data.push_back(unsigned(W >> 48) & 255);
+    Data.push_back(unsigned(W >> 40) & 255);
+    Data.push_back(unsigned(W >> 32) & 255);
+    Data.push_back(unsigned(W >> 24) & 255);
+    Data.push_back(unsigned(W >> 16) & 255);
+    Data.push_back(unsigned(W >>  8) & 255);
+    Data.push_back(unsigned(W >>  0) & 255);
+  }
+
+  /// fixByte - This callback is invoked when a byte needs to be
+  /// fixup the buffer.
+  inline void fixByte(uint8_t B, uint32_t offset) {
+    Data[offset] = B;
+  }
+
+  /// fixWord16 - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in correct endian format.
+  inline void fixWord16(uint16_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord16LE(W, offset);
+    else
+      fixWord16BE(W, offset);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in little endian format.
+  inline void fixWord16LE(uint16_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset] = (W >> 8) & 255;
+  }
+
+  /// fixWord16BE - This callback is invoked when a 16-bit word needs to
+  /// fixup data stream in big endian format.
+  inline void fixWord16BE(uint16_t W, uint32_t offset) {
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// emitWord - This callback is invoked when a word needs to
+  /// fixup the data in correct endian format and correct size.
+  inline void fixWord(uint64_t W, uint32_t offset) {
+    if (!Is64Bit)
+      fixWord32(W, offset);
+    else
+      fixWord64(W, offset);
+  }
+
+  /// fixWord32 - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord32(uint32_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord32LE(W, offset);
+    else
+      fixWord32BE(W, offset);
+  }
+
+  /// fixWord32LE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord32LE(uint32_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset] = (W >> 24) & 255;
+  }
+
+  /// fixWord32BE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord32BE(uint32_t W, uint32_t offset) {
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// fixWord64 - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord64(uint64_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord64LE(W, offset);
+    else
+      fixWord64BE(W, offset);
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord64LE(uint64_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 32) & 255;
+    Data[offset++] = (W >> 40) & 255;
+    Data[offset++] = (W >> 48) & 255;
+    Data[offset] = (W >> 56) & 255;
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord64BE(uint64_t W, uint32_t offset) {
+    Data[offset++] = (W >> 56) & 255;
+    Data[offset++] = (W >> 48) & 255;
+    Data[offset++] = (W >> 40) & 255;
+    Data[offset++] = (W >> 32) & 255;
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// emitAlignment - Pad the data to the specified alignment.
+  void emitAlignment(unsigned Alignment) {
+    if (Alignment <= 1) return;
+    unsigned PadSize = -Data.size() & (Alignment-1);
+    for (unsigned i = 0; i<PadSize; ++i)
+      Data.push_back(0);
+  }
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the data stream.
+  void emitULEB128Bytes(uint64_t Value) {
+    do {
+      unsigned char Byte = Value & 0x7f;
+      Value >>= 7;
+      if (Value) Byte |= 0x80;
+      emitByte(Byte);
+    } while (Value);
+  }
+
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the data stream.
+  void emitSLEB128Bytes(int64_t Value) {
+    int Sign = Value >> (8 * sizeof(Value) - 1);
+    bool IsMore;
+
+    do {
+      unsigned char Byte = Value & 0x7f;
+      Value >>= 7;
+      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+      if (IsMore) Byte |= 0x80;
+      emitByte(Byte);
+    } while (IsMore);
+  }
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the data stream.
+  void emitString(const std::string &String) {
+    for (unsigned i = 0, N = static_cast<unsigned>(String.size()); i<N; ++i) {
+      unsigned char C = String[i];
+      emitByte(C);
+    }
+    emitByte(0);
+  }
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const {
+    return Data.size();
+  }
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  void addRelocation(const MachineRelocation& relocation) {
+    Relocations.push_back(relocation);
+  }
+};
+
+} // end namespace llvm
+
+#endif
+
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@ -97,7 +97,7 @@ public:
  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
  /// written to the output stream in little-endian format.
  ///
-  void emitWordLE(unsigned W) {
+  void emitWordLE(uint32_t W) {
    if (4 <= BufferEnd-CurBufferPtr) {
      *CurBufferPtr++ = (uint8_t)(W >>  0);
      *CurBufferPtr++ = (uint8_t)(W >>  8);
@ -111,7 +111,7 @@ public:
  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
  /// written to the output stream in big-endian format.
  ///
-  void emitWordBE(unsigned W) {
+  void emitWordBE(uint32_t W) {
    if (4 <= BufferEnd-CurBufferPtr) {
      *CurBufferPtr++ = (uint8_t)(W >> 24);
      *CurBufferPtr++ = (uint8_t)(W >> 16);
@ -176,7 +176,7 @@ public:

  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
  /// written to the output stream.
-  void emitULEB128Bytes(unsigned Value) {
+  void emitULEB128Bytes(uint64_t Value) {
    do {
      uint8_t Byte = Value & 0x7f;
      Value >>= 7;
@ -187,7 +187,7 @@ public:
  
  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
  /// written to the output stream.
-  void emitSLEB128Bytes(int32_t Value) {
+  void emitSLEB128Bytes(int64_t Value) {
    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
    bool IsMore;
  
@ -212,7 +212,7 @@ public:
  }
  
  /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int32_t Value) {
+  void emitInt32(uint32_t Value) {
    if (4 <= BufferEnd-CurBufferPtr) {
      *((uint32_t*)CurBufferPtr) = Value;
      CurBufferPtr += 4;
--- a/include/llvm/CodeGen/LazyLiveness.h
+++ b/include/llvm/CodeGen/LazyLiveness.h
@ -0,0 +1,63 @@
+//===- LazyLiveness.h - Lazy, CFG-invariant liveness information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a lazy liveness analysis as per "Fast Liveness Checking
+// for SSA-form Programs," by Boissinot, et al.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LAZYLIVENESS_H
+#define LLVM_CODEGEN_LAZYLIVENESS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include <vector>
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class LazyLiveness : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  LazyLiveness() : MachineFunctionPass(&ID) { }
+  
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  bool vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB);
+  
+private:
+  void computeBackedgeChain(MachineFunction& mf, MachineBasicBlock* MBB);
+  
+  typedef std::pair<MachineBasicBlock*, MachineBasicBlock*> edge_t;
+  
+  MachineRegisterInfo* MRI;
+  
+  DenseMap<MachineBasicBlock*, unsigned> preorder;
+  std::vector<MachineBasicBlock*> rev_preorder;
+  DenseMap<MachineBasicBlock*, SparseBitVector<128> > rv;
+  DenseMap<MachineBasicBlock*, SparseBitVector<128> > tv;
+  DenseSet<edge_t> backedges;
+  SparseBitVector<128> backedge_source;
+  SparseBitVector<128> backedge_target;
+  SparseBitVector<128> calculated;
+};
+
+}
+
+#endif
+
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@ -104,7 +104,7 @@ public:
  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
  /// written to the output stream in little-endian format.
  ///
-  void emitWordLE(unsigned W) {
+  void emitWordLE(uint32_t W) {
    if (4 <= BufferEnd-CurBufferPtr) {
      *CurBufferPtr++ = (uint8_t)(W >>  0);
      *CurBufferPtr++ = (uint8_t)(W >>  8);
@ -118,7 +118,7 @@ public:
  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
  /// written to the output stream in big-endian format.
  ///
-  void emitWordBE(unsigned W) {
+  void emitWordBE(uint32_t W) {
    if (4 <= BufferEnd-CurBufferPtr) {
      *CurBufferPtr++ = (uint8_t)(W >> 24);
      *CurBufferPtr++ = (uint8_t)(W >> 16);
@ -183,7 +183,7 @@ public:

  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
  /// written to the output stream.
-  void emitULEB128Bytes(unsigned Value) {
+  void emitULEB128Bytes(uint64_t Value) {
    do {
      uint8_t Byte = Value & 0x7f;
      Value >>= 7;
@ -194,8 +194,8 @@ public:
  
  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
  /// written to the output stream.
-  void emitSLEB128Bytes(int32_t Value) {
-    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(uint64_t Value) {
+    uint64_t Sign = Value >> (8 * sizeof(Value) - 1);
    bool IsMore;
  
    do {
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@ -243,7 +243,7 @@ public:
  }

  // The JIT overrides a version that actually does this.
-  virtual void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0) { }
+  virtual void runJITOnFunction(Function *, MachineCodeInfo * = 0) { }

  /// getGlobalValueAtAddress - Return the LLVM global value object that starts
  /// at the specified address.
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@ -395,6 +395,10 @@ public:
  /// including any contained basic blocks.
  ///
  void dropAllReferences();
+
+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it.
+  bool hasAddressTaken() const;
 };

 inline ValueSymbolTable *
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@ -127,6 +127,7 @@ namespace {
      (void) llvm::createPrintModulePass(0);
      (void) llvm::createPrintFunctionPass("", 0);
      (void) llvm::createDbgInfoPrinterPass();
+      (void) llvm::createPartialInliningPass();

      (void)new llvm::IntervalPartition();
      (void)new llvm::FindUsedTypes();
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@ -15,15 +15,21 @@
 #define LLVM_TARGET_TARGETELFWRITERINFO_H

 namespace llvm {
+  class Function;
+  class TargetData;
+  class TargetMachine;

  //===--------------------------------------------------------------------===//
  //                          TargetELFWriterInfo
  //===--------------------------------------------------------------------===//

  class TargetELFWriterInfo {
+  protected:
    // EMachine - This field is the target specific value to emit as the
    // e_machine member of the ELF header.
    unsigned short EMachine;
+    TargetMachine &TM;
+    bool is64Bit, isLittleEndian;
  public:

    // Machine architectures
@ -44,10 +50,39 @@ namespace llvm {
      EM_X86_64 = 62   // AMD64
    };

-    explicit TargetELFWriterInfo(MachineType machine) : EMachine(machine) {}
-    virtual ~TargetELFWriterInfo() {}
+    // ELF File classes
+    enum {
+      ELFCLASS32 = 1, // 32-bit object file
+      ELFCLASS64 = 2  // 64-bit object file
+    };
+
+    // ELF Endianess
+    enum {
+      ELFDATA2LSB = 1, // Little-endian object file
+      ELFDATA2MSB = 2  // Big-endian object file
+    };
+
+    explicit TargetELFWriterInfo(TargetMachine &tm);
+    virtual ~TargetELFWriterInfo();

    unsigned short getEMachine() const { return EMachine; }
+    unsigned getEFlags() const { return 0; }
+    unsigned getEIClass() const { return is64Bit ? ELFCLASS64 : ELFCLASS32; }
+    unsigned getEIData() const {
+      return isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+    }
+
+    /// ELF Header and ELF Section Header Info
+    unsigned getHdrSize() const { return is64Bit ? 64 : 52; }
+    unsigned getSHdrSize() const { return is64Bit ? 64 : 40; }
+
+    /// Symbol Table Info
+    unsigned getSymTabEntrySize() const { return is64Bit ? 24 : 16; }
+    unsigned getSymTabAlignment() const { return is64Bit ? 8 : 4; }
+
+    /// getFunctionAlignment - Returns the alignment for function 'F', targets
+    /// with different alignment constraints should overload this method
+    virtual unsigned getFunctionAlignment(const Function *F) const;
  };

 } // end llvm namespace
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -350,7 +350,7 @@ public:
  LegalizeAction getOperationAction(unsigned Op, MVT VT) const {
    if (VT.isExtended()) return Expand;
    assert(Op < array_lengthof(OpActions) &&
-           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*4 &&
+           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
           "Table isn't big enough!");
    return (LegalizeAction)((OpActions[Op] >> (2*VT.getSimpleVT())) & 3);
  }
@ -417,11 +417,10 @@ public:
  /// for it.
  LegalizeAction
  getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
-    assert(IdxMode < array_lengthof(IndexedModeActions[0]) &&
-           (unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[0][0])*4 &&
+    assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
+           ((unsigned)VT.getSimpleVT()) < MVT::LAST_VALUETYPE &&
           "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[0][IdxMode] >>
-                             (2*VT.getSimpleVT())) & 3);
+    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode]));
  }

  /// isIndexedLoadLegal - Return true if the specified indexed load is legal
@ -438,11 +437,10 @@ public:
  /// for it.
  LegalizeAction
  getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
-    assert(IdxMode < array_lengthof(IndexedModeActions[1]) &&
-           (unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[1][0])*4 &&
+    assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) &&
+           (unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
           "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[1][IdxMode] >>
-                             (2*VT.getSimpleVT())) & 3);
+    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode]));
  }  

  /// isIndexedStoreLegal - Return true if the specified indexed load is legal
@ -942,7 +940,7 @@ protected:
  /// with the specified type and indicate what to do about it.
  void setOperationAction(unsigned Op, MVT VT,
                          LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*4 &&
+    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
           Op < array_lengthof(OpActions) && "Table isn't big enough!");
    OpActions[Op] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
    OpActions[Op] |= (uint64_t)Action << VT.getSimpleVT()*2;
@ -978,11 +976,10 @@ protected:
  /// TargetLowering.cpp
  void setIndexedLoadAction(unsigned IdxMode, MVT VT,
                            LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[0])*4 &&
-           IdxMode < array_lengthof(IndexedModeActions[0]) &&
+    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
           "Table isn't big enough!");
-    IndexedModeActions[0][IdxMode] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    IndexedModeActions[0][IdxMode] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode] = (uint8_t)Action;
  }
  
  /// setIndexedStoreAction - Indicate that the specified indexed store does or
@ -991,11 +988,10 @@ protected:
  /// TargetLowering.cpp
  void setIndexedStoreAction(unsigned IdxMode, MVT VT,
                             LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[1][0])*4 &&
-           IdxMode < array_lengthof(IndexedModeActions[1]) &&
+    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           IdxMode < array_lengthof(IndexedModeActions[0][1] ) &&
           "Table isn't big enough!");
-    IndexedModeActions[1][IdxMode] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    IndexedModeActions[1][IdxMode] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode] = (uint8_t)Action;
  }
  
  /// setConvertAction - Indicate that the specified conversion does or does
@ -1581,10 +1577,13 @@ private:
  /// indicates how instruction selection should deal with the store.
  uint64_t TruncStoreActions[MVT::LAST_VALUETYPE];

-  /// IndexedModeActions - For each indexed mode and each value type, keep a
-  /// pair of LegalizeAction that indicates how instruction selection should
-  /// deal with the load / store.
-  uint64_t IndexedModeActions[2][ISD::LAST_INDEXED_MODE];
+  /// IndexedModeActions - For each indexed mode and each value type,
+  /// keep a pair of LegalizeAction that indicates how instruction
+  /// selection should deal with the load / store.  The first
+  /// dimension is now the value_type for the reference.  The second
+  /// dimension is the load [0] vs. store[1].  The third dimension
+  /// represents the various modes for load store.
+  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][2][ISD::LAST_INDEXED_MODE];
  
  /// ConvertActions - For each conversion from source type to destination type,
  /// keep a LegalizeAction that indicates how instruction selection should
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@ -78,6 +78,14 @@ namespace CodeGenOpt {
  };
 }

+namespace FloatABI {
+  enum ABIType {
+    Default,
+    Soft,
+    Hard
+  };
+}
+
 //===----------------------------------------------------------------------===//
 ///
 /// TargetMachine - Primary interface to the complete machine description for
@ -88,7 +96,7 @@ class TargetMachine {
  TargetMachine(const TargetMachine &);   // DO NOT IMPLEMENT
  void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetMachine() : AsmInfo(0) { }
+  TargetMachine();

  /// getSubtargetImpl - virtual method implemented by subclasses that returns
  /// a reference to that target's TargetSubtarget-derived member variable.
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@ -73,6 +73,14 @@ namespace llvm {
  /// target FP instructions.
  extern bool UseSoftFloat;

+  /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
+  /// on the command line. This setting may either be Default, Soft, or Hard.
+  /// Default selects the target's default behavior. Soft selects the ABI for
+  /// UseSoftFloat, but does not inidcate that FP hardware may not be used.
+  /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
+  /// Hard presumes that the normal FP ABI is used.
+  extern FloatABI::ABIType FloatABIType;
+
  /// NoZerosInBSS - By default some codegens place zero-initialized data to
  /// .bss section. This flag disables such behaviour (necessary, e.g. for
  /// crt*.o compiling).
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@ -228,6 +228,7 @@ class SDNode<string opcode, SDTypeProfile typeprof,
  SDTypeProfile TypeProfile = typeprof;
 }

+// Special TableGen-recognized dag nodes
 def set;
 def implicit;
 def parallel;
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@ -214,6 +214,11 @@ Pass *createFunctionAttrsPass();
 ///
 ModulePass *createMergeFunctionsPass();

+//===----------------------------------------------------------------------===//
+/// createPartialInliningPass - This pass inlines parts of functions.
+///
+ModulePass *createPartialInliningPass();
+
 } // End llvm namespace

 #endif
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@ -937,6 +937,48 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
  return Result;
 }

+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op,
+                                             const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Sign-extend negative constants.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    if (SC->getValue()->getValue().isNegative())
+      return getSignExtendExpr(Op, Ty);
+
+  // Peel off a truncate cast.
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+    SCEVHandle NewOp = T->getOperand();
+    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+      return getAnyExtendExpr(NewOp, Ty);
+    return getTruncateOrNoop(NewOp, Ty);
+  }
+
+  // Next try a zext cast. If the cast is folded, use it.
+  SCEVHandle ZExt = getZeroExtendExpr(Op, Ty);
+  if (!isa<SCEVZeroExtendExpr>(ZExt))
+    return ZExt;
+
+  // Next try a sext cast. If the cast is folded, use it.
+  SCEVHandle SExt = getSignExtendExpr(Op, Ty);
+  if (!isa<SCEVSignExtendExpr>(SExt))
+    return SExt;
+
+  // If the expression is obviously signed, use the sext cast value.
+  if (isa<SCEVSMaxExpr>(Op))
+    return SExt;
+
+  // Absent any other information, use the zext cast value.
+  return ZExt;
+}
+
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
 SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
@ -1903,6 +1945,23 @@ ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) {
  return getSignExtendExpr(V, Ty);
 }

+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+SCEVHandle
+ScalarEvolution::getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+         "Cannot noop or any extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrAnyExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getAnyExtendExpr(V, Ty);
+}
+
 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  The conversion must not be widening.
 SCEVHandle
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@ -16,6 +16,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;

 /// InsertCastOfTo - Insert a cast of V to the specified type, doing what
@ -319,8 +320,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
  if (!AnyNonZeroIndices) {
    V = InsertNoopCastOfTo(V,
                           Type::Int8Ty->getPointerTo(PTy->getAddressSpace()));
-    Value *Idx = expand(SE.getAddExpr(Ops));
-    Idx = InsertNoopCastOfTo(Idx, Ty);
+    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);

    // Fold a GEP with constant operands.
    if (Constant *CLHS = dyn_cast<Constant>(V))
@ -374,8 +374,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {

  // Emit a bunch of add instructions
  for (int i = S->getNumOperands()-2; i >= 0; --i) {
-    Value *W = expand(S->getOperand(i));
-    W = InsertNoopCastOfTo(W, Ty);
+    Value *W = expandCodeFor(S->getOperand(i), Ty);
    V = InsertBinop(Instruction::Add, V, W, InsertPt);
  }
  return V;
@ -389,13 +388,11 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
      FirstOp = 1;

  int i = S->getNumOperands()-2;
-  Value *V = expand(S->getOperand(i+1));
-  V = InsertNoopCastOfTo(V, Ty);
+  Value *V = expandCodeFor(S->getOperand(i+1), Ty);

  // Emit a bunch of multiply instructions
  for (; i >= FirstOp; --i) {
-    Value *W = expand(S->getOperand(i));
-    W = InsertNoopCastOfTo(W, Ty);
+    Value *W = expandCodeFor(S->getOperand(i), Ty);
    V = InsertBinop(Instruction::Mul, V, W, InsertPt);
  }

@ -408,8 +405,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());

-  Value *LHS = expand(S->getLHS());
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getLHS(), Ty);
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
    const APInt &RHS = SC->getValue()->getValue();
    if (RHS.isPowerOf2())
@ -418,8 +414,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
                         InsertPt);
  }

-  Value *RHS = expand(S->getRHS());
-  RHS = InsertNoopCastOfTo(RHS, Ty);
+  Value *RHS = expandCodeFor(S->getRHS(), Ty);
  return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt);
 }

@ -448,6 +443,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
  const Loop *L = S->getLoop();

+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = 0;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.isSCEVable(PN->getType()) &&
+        isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+        SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SCEVHandle Start = SE.getAnyExtendExpr(S->getStart(),
+                                           CanonicalIV->getType());
+    SCEVHandle Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
+                                          CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
+    BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+    BasicBlock::iterator NewInsertPt =
+      next(BasicBlock::iterator(cast<Instruction>(V)));
+    while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+                      NewInsertPt);
+    setInsertionPoint(SaveInsertPt);
+    return V;
+  }
+
  // {X,+,F} --> X + {0,+,F}
  if (!S->getStart()->isZero()) {
    std::vector<SCEVHandle> NewOps(S->getOperands());
@ -481,6 +504,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
  // {0,+,1} --> Insert a canonical induction variable into the loop!
  if (S->isAffine() &&
      S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+    // If there's a canonical IV, just use it.
+    if (CanonicalIV) {
+      assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+             "IVs with types different from the canonical IV should "
+             "already have been handled!");
+      return CanonicalIV;
+    }
+
    // Create and insert the PHI node for the induction variable in the
    // specified loop.
    BasicBlock *Header = L->getHeader();
@ -508,19 +539,16 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
    return PN;
  }

+  // {0,+,F} --> {0,+,1} * F
  // Get the canonical induction variable I for this loop.
-  Value *I = getOrInsertCanonicalInductionVariable(L, Ty);
+  Value *I = CanonicalIV ?
+             CanonicalIV :
+             getOrInsertCanonicalInductionVariable(L, Ty);

  // If this is a simple linear addrec, emit it now as a special case.
  if (S->isAffine()) {   // {0,+,F} --> i*F
-    Value *F = expand(S->getOperand(1));
-    F = InsertNoopCastOfTo(F, Ty);
-    
-    // IF the step is by one, just return the inserted IV.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(F))
-      if (CI->getValue() == 1)
-        return I;
-    
+    Value *F = expandCodeFor(S->getOperand(1), Ty);
+
    // If the insert point is directly inside of the loop, emit the multiply at
    // the insert point.  Otherwise, L is a loop that is a parent of the insert
    // point loop.  If we can, move the multiply to the outer most loop that it
@ -555,16 +583,24 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
  // into this folder.
  SCEVHandle IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.

-  SCEVHandle V = S->evaluateAtIteration(IH, SE);
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  SCEVHandle NewS = S;
+  SCEVHandle Ext = SE.getNoopOrAnyExtend(S, I->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  SCEVHandle V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
  //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";

+  // Truncate the result down to the original type, if needed.
+  SCEVHandle T = SE.getTruncateOrNoop(V, Ty);
  return expand(V);
 }

 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
  Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt);
  InsertedValues.insert(I);
  return I;
@ -572,8 +608,8 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {

 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
  Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt);
  InsertedValues.insert(I);
  return I;
@ -581,8 +617,8 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {

 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
  Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt);
  InsertedValues.insert(I);
  return I;
@ -590,11 +626,9 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {

 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expand(S->getOperand(0));
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
-    Value *RHS = expand(S->getOperand(i));
-    RHS = InsertNoopCastOfTo(RHS, Ty);
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    Instruction *ICmp =
      new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt);
    InsertedValues.insert(ICmp);
@ -607,11 +641,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {

 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expand(S->getOperand(0));
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
  for (unsigned i = 1; i < S->getNumOperands(); ++i) {
-    Value *RHS = expand(S->getOperand(i));
-    RHS = InsertNoopCastOfTo(RHS, Ty);
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
    Instruction *ICmp =
      new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt);
    InsertedValues.insert(ICmp);
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@ -167,10 +167,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
  mbr->data = 0;
  mbr->path = filePath;
  const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
-  if (FSInfo)
-    mbr->info = *FSInfo;
-  else
+  if (!FSInfo) {
+    delete mbr;
    return true;
+  }
+  mbr->info = *FSInfo;

  unsigned flags = 0;
  bool hasSlash = filePath.toString().find('/') != std::string::npos;
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@ -1308,16 +1308,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
  // Emit constants.
  WriteModuleConstants(VE, Stream);
  
-  // If we have any aggregate values in the value table, purge them - these can
-  // only be used to initialize global variables.  Doing so makes the value
-  // namespace smaller for code in functions.
-  int NumNonAggregates = VE.PurgeAggregateValues();
-  if (NumNonAggregates != -1) {
-    SmallVector<unsigned, 1> Vals;
-    Vals.push_back(NumNonAggregates);
-    Stream.EmitRecord(bitc::MODULE_CODE_PURGEVALS, Vals);
-  }
-  
  // Emit function bodies.
  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
    if (!I->isDeclaration())
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@ -277,22 +277,6 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
 }


-/// PurgeAggregateValues - If there are any aggregate values at the end of the
-/// value list, remove them and return the count of the remaining values.  If
-/// there are none, return -1.
-int ValueEnumerator::PurgeAggregateValues() {
-  // If there are no aggregate values at the end of the list, return -1.
-  if (Values.empty() || Values.back().first->getType()->isSingleValueType())
-    return -1;
-  
-  // Otherwise, remove aggregate values...
-  while (!Values.empty() && !Values.back().first->getType()->isSingleValueType())
-    Values.pop_back();
-  
-  // ... and return the new size.
-  return Values.size();
-}
-
 void ValueEnumerator::incorporateFunction(const Function &F) {
  NumModuleValues = Values.size();
  
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@ -99,11 +99,6 @@ public:
    return Attributes;
  }

-  /// PurgeAggregateValues - If there are any aggregate values at the end of the
-  /// value list, remove them and return the count of the remaining values.  If
-  /// there are none, return -1.
-  int PurgeAggregateValues();
-  
  /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
  /// use these two methods to get its data into the ValueEnumerator!
  ///
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@ -1581,6 +1581,7 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
    FunctionDbgScope = NULL;
    LexicalScopeStack.clear();
    AbstractInstanceRootList.clear();
+    AbstractInstanceRootMap.clear();
  }

  Lines.clear();
@ -1669,7 +1670,11 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
  DbgScope *Scope = getOrCreateScope(V);
  unsigned ID = MMI->NextLabelID();
  Scope->setEndLabelID(ID);
-  if (LexicalScopeStack.size() != 0)
+  // FIXME : region.end() may not be in the last basic block.
+  // For now, do not pop last lexical scope because next basic
+  // block may start new inlined function's body.
+  unsigned LSSize = LexicalScopeStack.size();
+  if (LSSize != 0 && LSSize != 1)
    LexicalScopeStack.pop_back();

  if (TimePassesIsEnabled)
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@ -12,6 +12,7 @@ add_llvm_library(LLVMCodeGen
  IntrinsicLowering.cpp
  LLVMTargetMachine.cpp
  LatencyPriorityQueue.cpp
+  LazyLiveness.cpp
  LiveInterval.cpp
  LiveIntervalAnalysis.cpp
  LiveStackAnalysis.cpp
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@ -10,23 +10,24 @@
 // This header contains common, non-processor-specific data structures and
 // constants for the ELF file format.
 //
-// The details of the ELF32 bits in this file are largely based on
-// the Tool Interface Standard (TIS) Executable and Linking Format
-// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not
-// standardized, as far as I can tell. It was largely based on information
-// I found in OpenBSD header files.
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
 //
 //===----------------------------------------------------------------------===//

 #ifndef CODEGEN_ELF_H
 #define CODEGEN_ELF_H

+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Support/DataTypes.h"
 #include <cstring>

 namespace llvm {
-  class GlobalVariable;
+  class BinaryObject;

  // Identification Indexes
  enum {
@ -47,71 +48,28 @@ namespace llvm {
    ET_HIPROC = 0xffff  // Processor-specific
  };

-  // Object file classes.
-  enum {
-    ELFCLASS32 = 1, // 32-bit object file
-    ELFCLASS64 = 2  // 64-bit object file
-  };
-
-  // Object file byte orderings.
-  enum {
-    ELFDATA2LSB = 1, // Little-endian object file
-    ELFDATA2MSB = 2  // Big-endian object file
-  };
-
  // Versioning
  enum {
    EV_NONE = 0,
    EV_CURRENT = 1
  };

-  struct ELFHeader {
-    // e_machine - This field is the target specific value to emit as the
-    // e_machine member of the ELF header.
-    unsigned short e_machine;
-
-    // e_flags - The machine flags for the target.  This defaults to zero.
-    unsigned e_flags;
-
-    // e_size - Holds the ELF header's size in bytes
-    unsigned e_ehsize;
-
-    // Endianess and ELF Class (64 or 32 bits)
-    unsigned ByteOrder;
-    unsigned ElfClass;
-
-    unsigned getByteOrder() const { return ByteOrder; }
-    unsigned getElfClass() const { return ElfClass; }
-    unsigned getSize() const { return e_ehsize; }
-    unsigned getMachine() const { return e_machine; }
-    unsigned getFlags() const { return e_flags; }
-
-    ELFHeader(unsigned short machine, unsigned flags,
-              bool is64Bit, bool isLittleEndian)
-      : e_machine(machine), e_flags(flags) {
-        ElfClass  = is64Bit ? ELFCLASS64 : ELFCLASS32;
-        ByteOrder = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
-        e_ehsize  = is64Bit ? 64 : 52;
-      }
-  };
-
  /// ELFSection - This struct contains information about each section that is
  /// emitted to the file.  This is eventually turned into the section header
  /// table at the end of the file.
-  struct ELFSection {
-
+  class ELFSection : public BinaryObject {
+    public:
    // ELF specific fields
-    std::string Name;       // Name of the section.
-    unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
-    unsigned Type;
-    unsigned Flags;
-    uint64_t Addr;
-    unsigned Offset;
-    unsigned Size;
-    unsigned Link;
-    unsigned Info;
-    unsigned Align;
-    unsigned EntSize;
+    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
+    unsigned Type;      // sh_type - Section contents & semantics 
+    unsigned Flags;     // sh_flags - Section flags.
+    uint64_t Addr;      // sh_addr - The mem addr this section is in.
+    unsigned Offset;    // sh_offset - Offset from the file start
+    unsigned Size;      // sh_size - The section size.
+    unsigned Link;      // sh_link - Section header table index link.
+    unsigned Info;      // sh_info - Auxillary information.
+    unsigned Align;     // sh_addralign - Alignment of section.
+    unsigned EntSize;   // sh_entsize - Size of entries in the section e

    // Section Header Flags
    enum {
@ -141,8 +99,8 @@ namespace llvm {
      SHT_REL      = 9,  // Relocation entries; no explicit addends.
      SHT_SHLIB    = 10, // Reserved.
      SHT_DYNSYM   = 11, // Symbol table.
-      SHT_LOPROC   = 0x70000000, // Lowest processor architecture-specific type.
-      SHT_HIPROC   = 0x7fffffff, // Highest processor architecture-specific type.
+      SHT_LOPROC   = 0x70000000, // Lowest processor arch-specific type.
+      SHT_HIPROC   = 0x7fffffff, // Highest processor arch-specific type.
      SHT_LOUSER   = 0x80000000, // Lowest type reserved for applications.
      SHT_HIUSER   = 0xffffffff  // Highest type reserved for applications.
    };
@ -161,22 +119,9 @@ namespace llvm {
    /// SectionIdx - The number of the section in the Section Table.
    unsigned short SectionIdx;

-    /// SectionData - The actual data for this section which we are building
-    /// up for emission to the file.
-    std::vector<unsigned char> SectionData;
-
-    /// Relocations - The relocations that we have encountered so far in this 
-    /// section that we will need to convert to Elf relocation entries when
-    /// the file is written.
-    std::vector<MachineRelocation> Relocations;
-
-    /// Section Header Size 
-    static unsigned getSectionHdrSize(bool is64Bit)
-      { return is64Bit ? 64 : 40; }
-
-    ELFSection(const std::string &name)
-      : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
-        Link(0), Info(0), Align(0), EntSize(0) {}
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {}
  };

  /// ELFSym - This struct contains information about each symbol that is
@ -207,9 +152,33 @@ namespace llvm {
      STT_FILE = 4 
    };

+    enum {
+      STV_DEFAULT = 0,  // Visibility is specified by binding type
+      STV_INTERNAL = 1, // Defined by processor supplements
+      STV_HIDDEN = 2,   // Not visible to other components
+      STV_PROTECTED = 3 // Visible in other components but not preemptable
+    };
+
    ELFSym(const GlobalValue *gv) : GV(gv), NameIdx(0), Value(0),
                                    Size(0), Info(0), Other(0),
-                                    SectionIdx(ELFSection::SHN_UNDEF) {}
+                                    SectionIdx(ELFSection::SHN_UNDEF) {
+      if (!GV)
+        return;
+
+      switch (GV->getVisibility()) {
+      default:
+        assert(0 && "unknown visibility type");
+      case GlobalValue::DefaultVisibility:
+        Other = STV_DEFAULT;
+        break;
+      case GlobalValue::HiddenVisibility:
+        Other = STV_HIDDEN;
+        break;
+      case GlobalValue::ProtectedVisibility:
+        Other = STV_PROTECTED;
+        break;
+      }
+    }

    void SetBind(unsigned X) {
      assert(X == (X & 0xF) && "Bind value out of range!");
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@ -13,9 +13,9 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"

@ -28,27 +28,22 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  const TargetData *TD = TM.getTargetData();
-  const Function *F = MF.getFunction();
-
-  // Align the output buffer to the appropriate alignment, power of 2.
-  unsigned FnAlign = F->getAlignment();
-  unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
-  unsigned Align = std::max(FnAlign, TDAlign);
-  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
-
  // Get the ELF Section that this function belongs in.
  ES = &EW.getTextSection();

+  DOUT << "processing function: " << MF.getFunction()->getName() << "\n";
+
  // FIXME: better memory management, this will be replaced by BinaryObjects
-  ES->SectionData.reserve(4096);
-  BufferBegin = &ES->SectionData[0];
-  BufferEnd = BufferBegin + ES->SectionData.capacity();
+  BinaryData &BD = ES->getData();
+  BD.reserve(4096);
+  BufferBegin = &BD[0];
+  BufferEnd = BufferBegin + BD.capacity();

-  // Upgrade the section alignment if required.
+  // Align the output buffer with function alignment, and
+  // upgrade the section alignment if required
+  unsigned Align =
+    TM.getELFWriterInfo()->getFunctionAlignment(MF.getFunction());
  if (ES->Align < Align) ES->Align = Align;
-
-  // Round the size up to the correct alignment for starting the new function.
  ES->Size = (ES->Size + (Align-1)) & (-Align);

  // Snaity check on allocated space for text section
@ -107,7 +102,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
  FnSym.Value = FnStartPtr-BufferBegin;

  // Finally, add it to the symtab.
-  EW.SymbolTable.push_back(FnSym);
+  EW.SymbolList.push_back(FnSym);

  // Relocations
  // -----------
@ -128,7 +123,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
    } else {
      assert(0 && "Unhandled relocation type");
    }
-    ES->Relocations.push_back(MR);
+    ES->addRelocation(MR);
  }
  Relocations.clear();

--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@ -26,9 +26,6 @@
 //  ...
 //  #N. ".shstrtab" entry - String table for the section names.
 //
-// NOTE: This code should eventually be extended to support 64-bit ELF (this
-// won't be hard), but we haven't done so yet!
-//
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "elfwriter"
@ -36,18 +33,18 @@
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
 #include "ELF.h"
+#include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/OutputBuffer.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
@ -70,21 +67,23 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 //===----------------------------------------------------------------------===//

 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
-  : MachineFunctionPass(&ID), O(o), TM(tm), ElfHdr() {
-  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-  isLittleEndian = TM.getTargetData()->isLittleEndian();
+  : MachineFunctionPass(&ID), O(o), TM(tm),
+    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+    isLittleEndian(TM.getTargetData()->isLittleEndian()),
+    ElfHdr(isLittleEndian, is64Bit) {

-  ElfHdr = new ELFHeader(TM.getELFWriterInfo()->getEMachine(), 0,
-                         is64Bit, isLittleEndian);
+  TAI = TM.getTargetAsmInfo();
+  TEW = TM.getELFWriterInfo();

  // Create the machine code emitter object for this target.
  MCE = new ELFCodeEmitter(*this);
+
+  // Inital number of sections
  NumSections = 0;
 }

 ELFWriter::~ELFWriter() {
  delete MCE;
-  delete ElfHdr;
 }

 // doInitialization - Emit the file header and all of the global variables for
@ -92,10 +91,6 @@ ELFWriter::~ELFWriter() {
 bool ELFWriter::doInitialization(Module &M) {
  Mang = new Mangler(M);

-  // Local alias to shortenify coming code.
-  std::vector<unsigned char> &FH = FileHeader;
-  OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
-
  // ELF Header
  // ----------
  // Fields e_shnum e_shstrndx are only known after all section have
@ -104,54 +99,58 @@ bool ELFWriter::doInitialization(Module &M) {
  //
  // Note
  // ----
-  // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing
+  // emitWord method behaves differently for ELF32 and ELF64, writing
  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types

-  FHOut.outbyte(0x7f); // e_ident[EI_MAG0]
-  FHOut.outbyte('E');  // e_ident[EI_MAG1]
-  FHOut.outbyte('L');  // e_ident[EI_MAG2]
-  FHOut.outbyte('F');  // e_ident[EI_MAG3]
+  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
+  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
+  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]

-  FHOut.outbyte(ElfHdr->getElfClass());   // e_ident[EI_CLASS]
-  FHOut.outbyte(ElfHdr->getByteOrder());  // e_ident[EI_DATA]
-  FHOut.outbyte(EV_CURRENT);  // e_ident[EI_VERSION]
+  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
+  ElfHdr.emitByte(EV_CURRENT);        // e_ident[EI_VERSION]
+  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]

-  FH.resize(16);  // e_ident[EI_NIDENT-EI_PAD]
-
-  FHOut.outhalf(ET_REL);               // e_type
-  FHOut.outhalf(ElfHdr->getMachine()); // e_machine = target
-  FHOut.outword(EV_CURRENT);           // e_version
-  FHOut.outaddr(0);                    // e_entry = 0, no entry point in .o file
-  FHOut.outaddr(0);                    // e_phoff = 0, no program header for .o
-  ELFHdr_e_shoff_Offset = FH.size();
-  FHOut.outaddr(0);                    // e_shoff = sec hdr table off in bytes
-  FHOut.outword(ElfHdr->getFlags());   // e_flags = whatever the target wants
-  FHOut.outhalf(ElfHdr->getSize());    // e_ehsize = ELF header size
-  FHOut.outhalf(0);                    // e_phentsize = prog header entry size
-  FHOut.outhalf(0);                    // e_phnum = # prog header entries = 0
+  ElfHdr.emitWord16(ET_REL);             // e_type
+  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+  ElfHdr.emitWord32(EV_CURRENT);         // e_version
+  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
+  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
+  ELFHdr_e_shoff_Offset = ElfHdr.size();
+  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
+  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
+  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
+  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
+  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0

  // e_shentsize = Section header entry size
-  FHOut.outhalf(ELFSection::getSectionHdrSize(is64Bit));
+  ElfHdr.emitWord16(TEW->getSHdrSize());

  // e_shnum     = # of section header ents
-  ELFHdr_e_shnum_Offset = FH.size();
-  FHOut.outhalf(0);
+  ELFHdr_e_shnum_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder

  // e_shstrndx  = Section # of '.shstrtab'
-  ELFHdr_e_shstrndx_Offset = FH.size();
-  FHOut.outhalf(0);
+  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder

  // Add the null section, which is required to be first in the file.
  getSection("", ELFSection::SHT_NULL, 0);

-  // Start up the symbol table.  The first entry in the symtab is the null
+  // Start up the symbol table.  The first entry in the symtab is the null 
  // entry.
-  SymbolTable.push_back(ELFSym(0));
+  SymbolList.push_back(ELFSym(0));

  return false;
 }

 void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+
+  // XXX: put local symbols *before* global ones!
+  const Section *S = TAI->SectionForGlobal(GV);
+  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
+
  // If this is an external global, emit it now.  TODO: Note that it would be
  // better to ignore the symbol here and only add it to the symbol table if
  // referenced.
@ -160,17 +159,17 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
    ExternalSym.SetBind(ELFSym::STB_GLOBAL);
    ExternalSym.SetType(ELFSym::STT_NOTYPE);
    ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
-    SymbolTable.push_back(ExternalSym);
+    SymbolList.push_back(ExternalSym);
    return;
  }

-  unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
-  unsigned Size  =
-    TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType());
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPreferredAlignment(GV);
+  Constant *CV = GV->getInitializer();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());

-  // If this global has a zero initializer, it is part of the .bss or common
-  // section.
-  if (GV->getInitializer()->isNullValue()) {
+  // If this global has a zero initializer, go to .bss or common section.
+  if (CV->isNullValue() || isa<UndefValue>(CV)) {
    // If this global is part of the common block, add it now.  Variables are
    // part of the common block if they are zero initialized and allowed to be
    // merged with other symbols.
@ -182,14 +181,14 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
      CommonSym.Size  = Size;
      CommonSym.SetBind(ELFSym::STB_GLOBAL);
      CommonSym.SetType(ELFSym::STT_OBJECT);
-      // TODO SOMEDAY: add ELF visibility.
      CommonSym.SectionIdx = ELFSection::SHN_COMMON;
-      SymbolTable.push_back(CommonSym);
+      SymbolList.push_back(CommonSym);
+      getSection(S->getName(), ELFSection::SHT_NOBITS,
+        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 1);
      return;
    }

    // Otherwise, this symbol is part of the .bss section.  Emit it now.
-
    // Handle alignment.  Ensure section is aligned at least as much as required
    // by this symbol.
    ELFSection &BSSSection = getBSSSection();
@ -220,18 +219,128 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
    // Set the idx of the .bss section
    BSSSym.SectionIdx = BSSSection.SectionIdx;
    if (!GV->hasPrivateLinkage())
-      SymbolTable.push_back(BSSSym);
+      SymbolList.push_back(BSSSym);

    // Reserve space in the .bss section for this symbol.
    BSSSection.Size += Size;
    return;
  }

-  // FIXME: handle .rodata
-  //assert(!GV->isConstant() && "unimp");
+  /// Emit the Global symbol to the right ELF section
+  ELFSym GblSym(GV);
+  GblSym.Size = Size;
+  GblSym.SetType(ELFSym::STT_OBJECT);
+  GblSym.SetBind(ELFSym::STB_GLOBAL);
+  unsigned Flags = S->getFlags();
+  unsigned SectType = ELFSection::SHT_PROGBITS;
+  unsigned SHdrFlags = ELFSection::SHF_ALLOC;

-  // FIXME: handle .data
-  //assert(0 && "unimp");
+  if (Flags & SectionFlags::Code)
+    SHdrFlags |= ELFSection::SHF_EXECINSTR;
+  if (Flags & SectionFlags::Writeable)
+    SHdrFlags |= ELFSection::SHF_WRITE;
+  if (Flags & SectionFlags::Mergeable)
+    SHdrFlags |= ELFSection::SHF_MERGE;
+  if (Flags & SectionFlags::TLS)
+    SHdrFlags |= ELFSection::SHF_TLS;
+  if (Flags & SectionFlags::Strings)
+    SHdrFlags |= ELFSection::SHF_STRINGS;
+
+  // Remove tab from section name prefix
+  std::string SectionName(S->getName());
+  size_t Pos = SectionName.find("\t");
+  if (Pos != std::string::npos)
+    SectionName.erase(Pos, 1);
+
+  // The section alignment should be bound to the element with
+  // the largest alignment
+  ELFSection &ElfS = getSection(SectionName, SectType, SHdrFlags);
+  GblSym.SectionIdx = ElfS.SectionIdx;
+  if (Align > ElfS.Align)
+    ElfS.Align = Align;
+
+  // S.Value should contain the symbol index inside the section,
+  // and all symbols should start on their required alignment boundary
+  GblSym.Value = (ElfS.size() + (Align-1)) & (-Align);
+  ElfS.emitAlignment(Align);
+
+  // Emit the constant symbol to its section
+  EmitGlobalConstant(CV, ElfS);
+  SymbolList.push_back(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                         ELFSection &GblS) {
+
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CVS->getType());
+  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+  uint64_t sizeSoFar = 0;
+  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+    const Constant* field = CVS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+                        - cvsLayout->getElementOffset(i)) - fieldSize;
+    sizeSoFar += fieldSize + padSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstant(field, GblS);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    for (unsigned p=0; p < padSize; p++)
+      GblS.emitByte(0);
+  }
+  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+    if (CVA->isString()) {
+      std::string GblStr = CVA->getAsString();
+      GblS.emitString(GblStr);
+    } else { // Not a string.  Print the values in successive locations
+      for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+        EmitGlobalConstant(CVA->getOperand(i), GblS);
+    }
+    return;
+  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+    EmitGlobalConstantStruct(CVS, GblS);
+    return;
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    if (CFP->getType() == Type::DoubleTy)
+      GblS.emitWord64(Val);
+    else if (CFP->getType() == Type::FloatTy)
+      GblS.emitWord32(Val);
+    else if (CFP->getType() == Type::X86_FP80Ty) {
+      assert(0 && "X86_FP80Ty global emission not implemented");
+    } else if (CFP->getType() == Type::PPC_FP128Ty)
+      assert(0 && "PPC_FP128Ty global emission not implemented");
+    return;
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (Size == 4)
+      GblS.emitWord32(CI->getZExtValue());
+    else if (Size == 8)
+      GblS.emitWord64(CI->getZExtValue());
+    else
+      assert(0 && "LargeInt global emission not implemented");
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const VectorType *PTy = CP->getType();
+    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+      EmitGlobalConstant(CP->getOperand(I), GblS);
+    return;
+  }
+  assert(0 && "unknown global constant");
 }


@ -243,22 +352,41 @@ bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
 /// doFinalization - Now that the module has been completely processed, emit
 /// the ELF file to 'O'.
 bool ELFWriter::doFinalization(Module &M) {
-  // Okay, the ELF header and .text sections have been completed, build the
-  // .data, .bss, and "common" sections next.
+  /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the
+  /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data
+  /// vector size for .text sections, so this is a quick dirty fix
+  ELFSection &TS = getTextSection();
+  if (TS.Size) {
+    BinaryData &BD = TS.getData();
+    for (unsigned e=0; e<TS.Size; ++e)
+      BD.push_back(BD[e]);
+  }
+
+  // Emit .data section placeholder
+  getDataSection();
+
+  // Emit .bss section placeholder
+  getBSSSection();
+
+  // Build and emit data, bss and "common" sections.
  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
       I != E; ++I)
    EmitGlobal(I);

+  // Emit non-executable stack note
+  if (TAI->getNonexecutableStackDirective())
+    getNonExecStackSection();
+
  // Emit the symbol table now, if non-empty.
  EmitSymbolTable();

  // Emit the relocation sections.
  EmitRelocations();

-  // Emit the string table for the sections in the ELF file.
+  // Emit the sections string table.
  EmitSectionTableStringTable();

-  // Emit the sections to the .o file, and emit the section table for the file.
+  // Dump the sections and section table to the .o file.
  OutputSectionsAndSectionTable();

  // We are done with the abstract symbols.
@ -274,78 +402,97 @@ bool ELFWriter::doFinalization(Module &M) {
 void ELFWriter::EmitRelocations() {
 }

+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+  if (is64Bit) {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+    SymbolTable.emitWord64(Sym.Value);
+    SymbolTable.emitWord64(Sym.Size);
+  } else {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitWord32(Sym.Value);
+    SymbolTable.emitWord32(Sym.Size);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+  }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, 
+                                  const ELFSection &SHdr) {
+  SHdrTab.emitWord32(SHdr.NameIdx);
+  SHdrTab.emitWord32(SHdr.Type);
+  if (is64Bit) {
+    SHdrTab.emitWord64(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord64(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord64(SHdr.Align);
+    SHdrTab.emitWord64(SHdr.EntSize);
+  } else {
+    SHdrTab.emitWord32(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord32(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord32(SHdr.Align);
+    SHdrTab.emitWord32(SHdr.EntSize);
+  }
+}
+
 /// EmitSymbolTable - If the current symbol table is non-empty, emit the string
 /// table for it and then the symbol table itself.
 void ELFWriter::EmitSymbolTable() {
-  if (SymbolTable.size() == 1) return;  // Only the null entry.
+  if (SymbolList.size() == 1) return;  // Only the null entry.

  // FIXME: compact all local symbols to the start of the symtab.
  unsigned FirstNonLocalSymbol = 1;

-  ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
-  StrTab.Align = 1;
-
-  DataBuffer &StrTabBuf = StrTab.SectionData;
-  OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+  ELFSection &StrTab = getStringTableSection();

  // Set the zero'th symbol to a null byte, as required.
-  StrTabOut.outbyte(0);
+  StrTab.emitByte(0);
+
  unsigned Index = 1;
-  for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+  for (unsigned i = 1, e = SymbolList.size(); i != e; ++i) {
    // Use the name mangler to uniquify the LLVM symbol.
-    std::string Name = Mang->getValueName(SymbolTable[i].GV);
+    std::string Name = Mang->getValueName(SymbolList[i].GV);

    if (Name.empty()) {
-      SymbolTable[i].NameIdx = 0;
+      SymbolList[i].NameIdx = 0;
    } else {
-      SymbolTable[i].NameIdx = Index;
-
-      // Add the name to the output buffer, including the null terminator.
-      StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
-
-      // Add a null terminator.
-      StrTabBuf.push_back(0);
+      SymbolList[i].NameIdx = Index;
+      StrTab.emitString(Name);

      // Keep track of the number of bytes emitted to this section.
      Index += Name.size()+1;
    }
  }
-  assert(Index == StrTabBuf.size());
+  assert(Index == StrTab.size());
  StrTab.Size = Index;

  // Now that we have emitted the string table and know the offset into the
  // string table of each symbol, emit the symbol table itself.
-  ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
-  SymTab.Align = is64Bit ? 8 : 4;
-  SymTab.Link = StrTab.SectionIdx;      // Section Index of .strtab.
-  SymTab.Info = FirstNonLocalSymbol;    // First non-STB_LOCAL symbol.
-  SymTab.EntSize = is64Bit ? 24 : 16;   // Size of each symtab entry. 
-  DataBuffer &SymTabBuf = SymTab.SectionData;
-  OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
+  ELFSection &SymTab = getSymbolTableSection();
+  SymTab.Align = TEW->getSymTabAlignment();
+  SymTab.Link  = StrTab.SectionIdx;      // Section Index of .strtab.
+  SymTab.Info  = FirstNonLocalSymbol;    // First non-STB_LOCAL symbol.

-  if (!is64Bit) {   // 32-bit and 64-bit formats are shuffled a bit.
-    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
-      ELFSym &Sym = SymbolTable[i];
-      SymTabOut.outword(Sym.NameIdx);
-      SymTabOut.outaddr32(Sym.Value);
-      SymTabOut.outword(Sym.Size);
-      SymTabOut.outbyte(Sym.Info);
-      SymTabOut.outbyte(Sym.Other);
-      SymTabOut.outhalf(Sym.SectionIdx);
-    }
-  } else {
-    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
-      ELFSym &Sym = SymbolTable[i];
-      SymTabOut.outword(Sym.NameIdx);
-      SymTabOut.outbyte(Sym.Info);
-      SymTabOut.outbyte(Sym.Other);
-      SymTabOut.outhalf(Sym.SectionIdx);
-      SymTabOut.outaddr64(Sym.Value);
-      SymTabOut.outxword(Sym.Size);
-    }
-  }
+  // Size of each symtab entry.
+  SymTab.EntSize = TEW->getSymTabEntrySize();

-  SymTab.Size = SymTabBuf.size();
+  for (unsigned i = 0, e = SymbolList.size(); i != e; ++i)
+    EmitSymbol(SymTab, SymbolList[i]);
+
+  SymTab.Size = SymTab.size();
 }

 /// EmitSectionTableStringTable - This method adds and emits a section for the
@ -357,32 +504,25 @@ void ELFWriter::EmitSectionTableStringTable() {

  // Now that we know which section number is the .shstrtab section, update the
  // e_shstrndx entry in the ELF header.
-  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);

  // Set the NameIdx of each section in the string table and emit the bytes for
  // the string table.
  unsigned Index = 0;
-  DataBuffer &Buf = SHStrTab.SectionData;

  for (std::list<ELFSection>::iterator I = SectionList.begin(),
         E = SectionList.end(); I != E; ++I) {
    // Set the index into the table.  Note if we have lots of entries with
    // common suffixes, we could memoize them here if we cared.
    I->NameIdx = Index;
-
-    // Add the name to the output buffer, including the null terminator.
-    Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
-
-    // Add a null terminator.
-    Buf.push_back(0);
+    SHStrTab.emitString(I->getName());

    // Keep track of the number of bytes emitted to this section.
-    Index += I->Name.size()+1;
+    Index += I->getName().size()+1;
  }

  // Set the size of .shstrtab now that we know what it is.
-  assert(Index == Buf.size());
+  assert(Index == SHStrTab.size());
  SHStrTab.Size = Index;
 }

@ -391,9 +531,9 @@ void ELFWriter::EmitSectionTableStringTable() {
 /// SectionTable.
 void ELFWriter::OutputSectionsAndSectionTable() {
  // Pass #1: Compute the file offset for each section.
-  size_t FileOff = FileHeader.size();   // File header first.
+  size_t FileOff = ElfHdr.size();   // File header first.

-  // Emit all of the section data in order.
+  // Adjust alignment of all section if needed.
  for (std::list<ELFSection>::iterator I = SectionList.begin(),
         E = SectionList.end(); I != E; ++I) {

@ -401,9 +541,14 @@ void ELFWriter::OutputSectionsAndSectionTable() {
    if (!I->SectionIdx)
      continue;

+    if (!I->size()) {
+      I->Offset = FileOff;
+      continue;
+    }
+
    // Update Section size
    if (!I->Size)
-      I->Size = I->SectionData.size();
+      I->Size = I->size();

    // Align FileOff to whatever the alignment restrictions of the section are.
    if (I->Align)
@ -419,49 +564,40 @@ void ELFWriter::OutputSectionsAndSectionTable() {

  // Now that we know where all of the sections will be emitted, set the e_shnum
  // entry in the ELF header.
-  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset);
+  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);

  // Now that we know the offset in the file of the section table, update the
  // e_shoff address in the ELF header.
-  FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset);
+  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);

  // Now that we know all of the data in the file header, emit it and all of the
  // sections!
-  O.write((char*)&FileHeader[0], FileHeader.size());
-  FileOff = FileHeader.size();
-  DataBuffer().swap(FileHeader);
+  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+  FileOff = ElfHdr.size();

-  DataBuffer Table;
-  OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+  // Section Header Table blob
+  BinaryObject SHdrTable(isLittleEndian, is64Bit);

-  // Emit all of the section data and build the section table itself.
+  // Emit all of sections to the file and build the section header table.
  while (!SectionList.empty()) {
-    const ELFSection &S = *SectionList.begin();
+    ELFSection &S = *SectionList.begin();
+    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+         << ", Size: " << S.Size << ", Offset: " << S.Offset
+         << ", SectionData Size: " << S.size() << "\n";

    // Align FileOff to whatever the alignment restrictions of the section are.
-    if (S.Align)
+    if (S.Align) {
      for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
-           FileOff != NewFileOff; ++FileOff)
+        FileOff != NewFileOff; ++FileOff)
        O << (char)0xAB;
-    O.write((char*)&S.SectionData[0], S.Size);
+    }

-    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.Name
-         << ", Size: " << S.Size << ", Offset: " << S.Offset << "\n";
-
-    FileOff += S.Size;
-
-    TableOut.outword(S.NameIdx);  // sh_name - Symbol table name idx
-    TableOut.outword(S.Type);     // sh_type - Section contents & semantics
-    TableOut.outaddr(S.Flags);    // sh_flags - Section flags.
-    TableOut.outaddr(S.Addr);     // sh_addr - The mem addr this section is in.
-    TableOut.outaddr(S.Offset);   // sh_offset - Offset from the file start.
-    TableOut.outaddr(S.Size);     // sh_size - The section size.
-    TableOut.outword(S.Link);     // sh_link - Section header table index link.
-    TableOut.outword(S.Info);     // sh_info - Auxillary information.
-    TableOut.outaddr(S.Align);    // sh_addralign - Alignment of section.
-    TableOut.outaddr(S.EntSize);  // sh_entsize - Size of entries in the section
+    if (S.size()) {
+      O.write((char *)&S.getData()[0], S.Size);
+      FileOff += S.Size;
+    }

+    EmitSectionHeader(SHdrTable, S);
    SectionList.pop_front();
  }

@ -471,5 +607,5 @@ void ELFWriter::OutputSectionsAndSectionTable() {
    O << (char)0xAB;

  // Emit the section table itself.
-  O.write((char*)&Table[0], Table.size());
+  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
 }
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@ -16,15 +16,20 @@

 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
 #include "ELF.h"
 #include <list>
 #include <map>

 namespace llvm {
+  class BinaryObject;
+  class ConstantStruct;
+  class ELFCodeEmitter;
  class GlobalVariable;
  class Mangler;
  class MachineCodeEmitter;
-  class ELFCodeEmitter;
  class raw_ostream;

  /// ELFWriter - This class implements the common target-independent code for
@ -52,6 +57,9 @@ namespace llvm {
    /// Target machine description.
    TargetMachine &TM;

+    /// Target Elf Writer description.
+    const TargetELFWriterInfo *TEW;
+
    /// Mang - The object used to perform name mangling for this module.
    Mangler *Mang;

@ -59,6 +67,10 @@ namespace llvm {
    /// code for functions to the .o file.
    ELFCodeEmitter *MCE;

+    /// TAI - Target Asm Info, provide information about section names for
+    /// globals and other target specific stuff.
+    const TargetAsmInfo *TAI;
+
    //===------------------------------------------------------------------===//
    // Properties inferred automatically from the target machine.
    //===------------------------------------------------------------------===//
@ -77,13 +89,8 @@ namespace llvm {
    bool doFinalization(Module &M);

  private:
-    // The buffer we accumulate the file header into.  Note that this should be
-    // changed into something much more efficient later (and the bitcode writer
-    // as well!).
-    DataBuffer FileHeader;
-
-    /// ElfHdr - Hold information about the ELF Header
-    ELFHeader *ElfHdr;
+    // Blob containing the Elf header
+    BinaryObject ElfHdr;

    /// SectionList - This is the list of sections that we have emitted to the
    /// file.  Once the file has been completely built, the section header table
@ -97,17 +104,18 @@ namespace llvm {

    /// getSection - Return the section with the specified name, creating a new
    /// section if one does not already exist.
-    ELFSection &getSection(const std::string &Name,
-                           unsigned Type, unsigned Flags = 0) {
+    ELFSection &getSection(const std::string &Name, unsigned Type, 
+                           unsigned Flags = 0, unsigned Align = 0) {
      ELFSection *&SN = SectionLookup[Name];
      if (SN) return *SN;

-      SectionList.push_back(Name);
+      SectionList.push_back(ELFSection(Name, isLittleEndian, is64Bit));
      SN = &SectionList.back();
      SN->SectionIdx = NumSections++;
      SN->Type = Type;
      SN->Flags = Flags;
      SN->Link = ELFSection::SHN_UNDEF;
+      SN->Align = Align;
      return *SN;
    }

@ -116,23 +124,36 @@ namespace llvm {
                        ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
    }

+    ELFSection &getNonExecStackSection() {
+      return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
+    }
+
+    ELFSection &getSymbolTableSection() {
+      return getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+    }
+
+    ELFSection &getStringTableSection() {
+      return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1);
+    }
+
    ELFSection &getDataSection() {
      return getSection(".data", ELFSection::SHT_PROGBITS,
                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
    }
+
    ELFSection &getBSSSection() {
      return getSection(".bss", ELFSection::SHT_NOBITS,
                        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
    }

-    /// SymbolTable - This is the list of symbols we have emitted to the file.
+    /// SymbolList - This is the list of symbols we have emitted to the file.
    /// This actually gets rearranged before emission to the file (to put the
    /// local symbols first in the list).
-    std::vector<ELFSym> SymbolTable;
+    std::vector<ELFSym> SymbolList;

-    /// PendingSyms - This is a list of externally defined symbols that we have
-    /// been asked to emit, but have not seen a reference to.  When a reference
-    /// is seen, the symbol will move from this list to the SymbolTable.
+    /// PendingGlobals - List of externally defined symbols that we have been
+    /// asked to emit, but have not seen a reference to.  When a reference
+    /// is seen, the symbol will move from this list to the SymbolList.
    SetVector<GlobalValue*> PendingGlobals;

    // As we complete the ELF file, we need to update fields in the ELF header
@ -142,11 +163,17 @@ namespace llvm {
    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
+
  private:
    void EmitGlobal(GlobalVariable *GV);
-    void EmitSymbolTable();
+    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                  ELFSection &GblS);
    void EmitRelocations();
+    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
    void EmitSectionTableStringTable();
+    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+    void EmitSymbolTable();
    void OutputSectionsAndSectionTable();
  };
 }
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@ -240,7 +240,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
  if (OptLevel != CodeGenOpt::None) {
    PM.add(createMachineLICMPass());
    PM.add(createMachineSinkingPass());
-    printAndVerify(PM, /* allowDoubleDefs= */ true);
+    printAndVerify(PM, /* allowDoubleDefs= */ false);
  }

  // Run pre-ra passes.
--- a/lib/CodeGen/LazyLiveness.cpp
+++ b/lib/CodeGen/LazyLiveness.cpp
@ -0,0 +1,158 @@
+//===- LazyLiveness.cpp - Lazy, CFG-invariant liveness information --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a lazy liveness analysis as per "Fast Liveness Checking
+// for SSA-form Programs," by Boissinot, et al.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazyliveness"
+#include "llvm/CodeGen/LazyLiveness.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+using namespace llvm;
+
+char LazyLiveness::ID = 0;
+static RegisterPass<LazyLiveness> X("lazy-liveness", "Lazy Liveness Analysis");
+
+void LazyLiveness::computeBackedgeChain(MachineFunction& mf, 
+                                        MachineBasicBlock* MBB) {
+  SparseBitVector<128> tmp = rv[MBB];
+  tmp.set(preorder[MBB]);
+  tmp &= backedge_source;
+  calculated.set(preorder[MBB]);
+  
+  for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) {
+    MachineBasicBlock* SrcMBB = rev_preorder[*I];
+    
+    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin();
+         SI != SrcMBB->succ_end(); ++SI) {
+      MachineBasicBlock* TgtMBB = *SI;
+      
+      if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) &&
+          !rv[MBB].test(preorder[TgtMBB])) {
+        if (!calculated.test(preorder[TgtMBB]))
+          computeBackedgeChain(mf, TgtMBB);
+        
+        tv[MBB].set(preorder[TgtMBB]);
+        tv[MBB] |= tv[TgtMBB];
+      }
+    }
+    
+    tv[MBB].reset(preorder[MBB]);
+  }
+}
+
+bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
+  rv.clear();
+  tv.clear();
+  backedges.clear();
+  backedge_source.clear();
+  backedge_target.clear();
+  calculated.clear();
+  preorder.clear();
+  
+  MRI = &mf.getRegInfo();
+  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+  
+  // Step 0: Compute preorder numbering for all MBBs.
+  unsigned num = 0;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT.getRootNode()),
+       DE = df_end(MDT.getRootNode()); DI != DE; ++DI) {
+    preorder[(*DI)->getBlock()] = num++;
+    rev_preorder.push_back((*DI)->getBlock());
+  }
+  
+  // Step 1: Compute the transitive closure of the CFG, ignoring backedges.
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI) {
+    MachineBasicBlock* MBB = *POI;
+    SparseBitVector<128>& entry = rv[MBB];
+    entry.set(preorder[MBB]);
+    
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      DenseMap<MachineBasicBlock*, SparseBitVector<128> >::iterator SII = 
+                                                         rv.find(*SI);
+      
+      // Because we're iterating in postorder, any successor that does not yet
+      // have an rv entry must be on a backedge.
+      if (SII != rv.end()) {
+        entry |= SII->second;
+      } else {
+        backedges.insert(std::make_pair(MBB, *SI));
+        backedge_source.set(preorder[MBB]);
+        backedge_target.set(preorder[*SI]);
+      }
+    }
+  }
+  
+  for (SparseBitVector<128>::iterator I = backedge_source.begin();
+       I != backedge_source.end(); ++I)
+    computeBackedgeChain(mf, rev_preorder[*I]);
+  
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI)
+    if (!backedge_target.test(preorder[*POI]))
+      for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(),
+           SE = (*POI)->succ_end(); SI != SE; ++SI)
+        if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) {
+          SparseBitVector<128>& PBV = tv[*POI];
+          PBV = tv[*SI];
+        }
+  
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI)
+    tv[*POI].set(preorder[*POI]);
+  
+  return false;
+}
+
+bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) {
+  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+  
+  MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent();
+  unsigned def = preorder[DefMBB];
+  unsigned max_dom = 0;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[DefMBB]),
+       DE = df_end(MDT[DefMBB]); DI != DE; ++DI) {
+    if (preorder[DI->getBlock()] > max_dom) {
+      max_dom = preorder[(*DI)->getBlock()];
+    }
+  }
+  
+  if (preorder[MBB] <= def || max_dom < preorder[MBB])
+    return false;
+  
+  SparseBitVector<128>::iterator I = tv[MBB].begin();
+  while (I != tv[MBB].end() && *I <= def) ++I;
+  while (I != tv[MBB].end() && *I < max_dom) {
+    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg),
+         UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) {
+      MachineBasicBlock* UseMBB = UI->getParent();
+      if (rv[rev_preorder[*I]].test(preorder[UseMBB]))
+        return true;
+      
+      unsigned t_dom = 0;
+      for (df_iterator<MachineDomTreeNode*> DI =
+           df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); 
+           DI != DE; ++DI)
+        if (preorder[DI->getBlock()] > t_dom) {
+          max_dom = preorder[(*DI)->getBlock()];
+        }
+      I = tv[MBB].begin();
+      while (I != tv[MBB].end() && *I < t_dom) ++I;
+    }
+  }
+  
+  return false;
+}
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@ -214,26 +214,33 @@ void RegScavenger::forward() {
  }

  // Process uses first.
-  BitVector UseRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
  for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
    const MachineOperand MO = *UseMOs[i].first;
    unsigned Reg = MO.getReg();

    assert(isUsed(Reg) && "Using an undefined register!");

-    if (MO.isKill() && !isReserved(Reg)) {
-      UseRegs.set(Reg);
+    // Kill of implicit_def defined registers are ignored. e.g.
+    // entry: 0x2029ab8, LLVM BB @0x1b06080, ID#0:
+    // Live Ins: %R0
+    //  %R0<def> = IMPLICIT_DEF
+    //  %R0<def> = IMPLICIT_DEF
+    //  STR %R0<kill>, %R0, %reg0, 0, 14, %reg0, Mem:ST(4,4) [0x1b06510 + 0]
+    //  %R1<def> = LDR %R0, %reg0, 24, 14, %reg0, Mem:LD(4,4) [0x1b065bc + 0]
+    if (MO.isKill() && !isReserved(Reg) && !isImplicitlyDefined(Reg)) {
+      KillRegs.set(Reg);

      // Mark sub-registers as used.
      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
           unsigned SubReg = *SubRegs; ++SubRegs)
-        UseRegs.set(SubReg);
+        KillRegs.set(SubReg);
    }
  }

  // Change states of all registers after all the uses are processed to guard
  // against multiple uses.
-  setUnused(UseRegs);
+  setUnused(KillRegs);

  // Process early clobber defs then process defs. We can have a early clobber
  // that is dead, it should not conflict with a def that happens one "slot"
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@ -563,6 +563,11 @@ void *JIT::getPointerToFunction(Function *F) {
    return Addr;   // Check if function already code gen'd

  MutexGuard locked(lock);
+  
+  // Now that this thread owns the lock, check if another thread has already
+  // code gen'd the function.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;  

  // Make sure we read in the function if it exists in this Module.
  if (F->hasNotBeenReadFromBitcode()) {
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O,
 FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
                                       MachineCodeEmitter &MCE);

-FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
-                                        MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, 
-                                           JITCodeEmitter &JCE);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+                                       MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, 
+                                          JITCodeEmitter &JCE);

-FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMConstantIslandPass();

 } // end namespace llvm;
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@ -28,6 +28,8 @@ def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
                                   "ARM v5TE, v5TEj, v5TExp">;
 def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
                                   "ARM v6">;
+def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+                                   "ARM v6t2">;
 def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
                                   "ARM v7A">;
 def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
 def : Proc<"mpcorenovfp",     [ArchV6]>;
 def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;

-def : Proc<"arm1156t2-s",     [ArchV6, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s",    [ArchV6, FeatureThumb2, FeatureVFP2]>;
+// V6T2 Processors.
+def : Proc<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;

+// V7 Processors.
 def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;

--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>:
 class CCIfAlign<string Align, CCAction A>:
  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;

+/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
+///                "Soft".
+class CCIfFloatABI<string ABIType, CCAction A>:
+  CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
+
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[
 ]>;

 //===----------------------------------------------------------------------===//
-// ARM AAPCS (EABI) Calling Convention
+// ARM AAPCS (EABI) Calling Convention, common parts
 //===----------------------------------------------------------------------===//
-def CC_ARM_AAPCS : CallingConv<[
+
+def CC_ARM_AAPCS_Common : CallingConv<[

  CCIfType<[i8, i16], CCPromoteToType<i32>>,

@ -53,35 +59,71 @@ def CC_ARM_AAPCS : CallingConv<[
  // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
  // (and the same is true for f64 if VFP is not enabled)
  CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
-  CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
-
-  CCIfType<[f32], CCBitConvertToType<i32>>,
  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
                       "ArgFlags.getOrigAlign() != 8",
                       CCAssignToReg<[R0, R1, R2, R3]>>>,

-  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
  CCIfType<[f64], CCAssignToStack<8, 8>>
 ]>;

-def RetCC_ARM_AAPCS : CallingConv<[
-  CCIfType<[f32], CCBitConvertToType<i32>>,
-  CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
-
+def RetCC_ARM_AAPCS_Common : CallingConv<[
  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
 ]>;

+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+  CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+  CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Calling Convention Dispatch
 //===----------------------------------------------------------------------===//

 def CC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()",
+                CCIfSubtarget<"hasVFP2()",
+                              CCIfFloatABI<"Hard",
+                                           CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
  CCDelegateTo<CC_ARM_APCS>
 ]>;

 def RetCC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()",
+                CCIfSubtarget<"hasVFP2()",
+                              CCIfFloatABI<"Hard",
+                                           CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
  CCDelegateTo<RetCC_ARM_APCS>
 ]>;
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
      else
        RC = ARM::GPRRegisterClass;

-      if (RegVT == MVT::f64) {
+      if (FloatABIType == FloatABI::Hard) {
+        if (RegVT == MVT::f32)
+          RC = ARM::SPRRegisterClass;
+        else if (RegVT == MVT::f64)
+          RC = ARM::DPRRegisterClass;
+      } else if (RegVT == MVT::f64) {
        // f64 is passed in pairs of GPRs and must be combined.
        RegVT = MVT::i32;
      } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
 /// the function.  The first operand is the ID# for this instruction, the second
 /// is the index into the MachineConstantPool that this is, the third is the
 /// size in bytes of this constant pool entry.
-let isNotDuplicable = 1 in
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
 def CONSTPOOL_ENTRY :
 PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
                    i32imm:$size),
@ -771,6 +771,7 @@ def STM : AXI4st<(outs),
 //  Move Instructions.
 //

+let neverHasSideEffects = 1 in
 def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
                 "mov", " $dst, $src", []>, UnaryDP;
 def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
@ -946,6 +947,7 @@ def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
                   [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;

 // Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
                               (ins GPR:$a, GPR:$b),
                    "smull", " $ldst, $hdst, $a, $b", []>;
@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
                               (ins GPR:$a, GPR:$b),
                    "umaal", " $ldst, $hdst, $a, $b", []>,
                    Requires<[IsARM, HasV6]>;
+} // neverHasSideEffects

 // Most significant word multiply
 def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "add $dst, $lhs, $rhs",
                [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;

+let neverHasSideEffects = 1 in
 def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                   "add $dst, $rhs @ addhirr", []>;

@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),

 // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
 // which is MOV(3).  This also supports high registers.
+let neverHasSideEffects = 1 in {
 def tMOVr       : TI<(outs tGPR:$dst), (ins tGPR:$src),
                      "cpy $dst, $src", []>;
 def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
                      "cpy $dst, $src\t@ lor2hir", []>;
 def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
                      "cpy $dst, $src\t@ hir2hir", []>;
+} // neverHasSideEffects

 def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
               "mul $dst, $rhs",
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
  let Inst{7-4}   = 0b1100;
 }

+let neverHasSideEffects = 1 in {
 def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
                 "fcpyd", " $dst, $a", []>;

 def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
                 "fcpys", " $dst, $a", []>;
+} // neverHasSideEffects

 def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
                 "fnegd", " $dst, $a",
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@ -17,24 +17,31 @@
 #include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;

 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
 STATISTIC(NumSTMGened , "Number of stm instructions generated");
 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.

 namespace {
  struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
@ -81,12 +88,6 @@ namespace {
  char ARMLoadStoreOpt::ID = 0;
 }

-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
-FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
-  return new ARMLoadStoreOpt();
-}
-
 static int getLoadStoreMultipleOpcode(int Opcode) {
  switch (Opcode) {
  case ARM::LDR:
@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
    RS->forward(prior(Loc));
 }

+static int getMemoryOpOffset(const MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+  int Offset = isAM2
+    ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+  if (isAM2) {
+    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  } else {
+    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  }
+  return Offset;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
    bool isMemOp = isMemoryOp(MBBI);
    if (isMemOp) {
      int Opcode = MBBI->getOpcode();
-      bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
      unsigned Size = getLSMultipleTransferSize(MBBI);
      unsigned Base = MBBI->getOperand(1).getReg();
      unsigned PredReg = 0;
      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
-      unsigned NumOperands = MBBI->getDesc().getNumOperands();
-      unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
-      int Offset = isAM2
-        ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
-      if (isAM2) {
-        if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
-          Offset = -Offset;
-      } else {
-        if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
-          Offset = -Offset;
-      }
+      int Offset = getMemoryOpOffset(MBBI);
      // Watch out for:
      // r4 := ldr [r5]
      // r5 := ldr [r5, #4]
@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  return NumMerges > 0;
 }

+namespace {
+  struct OffsetCompare {
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      int LOffset = getMemoryOpOffset(LHS);
+      int ROffset = getMemoryOpOffset(RHS);
+      assert(LHS == RHS || LOffset != ROffset);
+      return LOffset > ROffset;
+    }
+  };
+}
+
 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
 /// (bx lr) into the preceeding stack restore so it directly restore the value
 /// of LR into pc.
@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  delete RS;
  return Modified;
 }
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+  struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+    static char ID;
+    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pre- register allocation load / store optimization pass";
+    }
+
+  private:
+    bool RescheduleOps(MachineBasicBlock *MBB,
+                       SmallVector<MachineInstr*, 4> &Ops,
+                       unsigned Base, bool isLd,
+                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+  };
+  char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TII = Fn.getTarget().getInstrInfo();
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI)
+    Modified |= RescheduleLoadStoreInstrs(MFI);
+
+  return Modified;
+}
+
+static bool IsSafeToMove(bool isLd, unsigned Base,
+                         MachineBasicBlock::iterator I,
+                         MachineBasicBlock::iterator E,
+                         SmallPtrSet<MachineInstr*, 4> MoveOps,
+                         const TargetRegisterInfo *TRI) {
+  // Are there stores / loads / calls between them?
+  // FIXME: This is overly conservative. We should make use of alias information
+  // some day.
+  while (++I != E) {
+    const TargetInstrDesc &TID = I->getDesc();
+    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+      return false;
+    if (isLd && TID.mayStore())
+      return false;
+    if (!isLd) {
+      if (TID.mayLoad())
+        return false;
+      // It's not safe to move the first 'str' down.
+      // str r1, [r0]
+      // strh r5, [r0]
+      // str r4, [r0, #+4]
+      if (TID.mayStore() && !MoveOps.count(&*I))
+        return false;
+    }
+    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+      MachineOperand &MO = I->getOperand(j);
+      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+        return false;
+    }
+  }
+  return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+                                 SmallVector<MachineInstr*, 4> &Ops,
+                                 unsigned Base, bool isLd,
+                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+  bool RetVal = false;
+
+  // Sort by offset (in reverse order).
+  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+  // The loads / stores of the same base are in order. Scan them from first to
+  // last and check for the followins:
+  // 1. Any def of base.
+  // 2. Any gaps.
+  while (Ops.size() > 1) {
+    unsigned FirstLoc = ~0U;
+    unsigned LastLoc = 0;
+    MachineInstr *FirstOp = 0;
+    MachineInstr *LastOp = 0;
+    int LastOffset = 0;
+    unsigned LastBytes = 0;
+    unsigned NumMove = 0;
+    for (int i = Ops.size() - 1; i >= 0; --i) {
+      MachineInstr *Op = Ops[i];
+      unsigned Loc = MI2LocMap[Op];
+      if (Loc <= FirstLoc) {
+        FirstLoc = Loc;
+        FirstOp = Op;
+      }
+      if (Loc >= LastLoc) {
+        LastLoc = Loc;
+        LastOp = Op;
+      }
+
+      int Offset = getMemoryOpOffset(Op);
+      unsigned Bytes = getLSMultipleTransferSize(Op);
+      if (LastBytes) {
+        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+          break;
+      }
+      LastOffset = Offset;
+      LastBytes = Bytes;
+      if (++NumMove == 4)
+        break;
+    }
+
+    if (NumMove <= 1)
+      Ops.pop_back();
+    else {
+      SmallPtrSet<MachineInstr*, 4> MoveOps;
+      for (int i = NumMove-1; i >= 0; --i)
+        MoveOps.insert(Ops[i]);
+
+      // Be conservative, if the instructions are too far apart, don't
+      // move them. We want to limit the increase of register pressure.
+      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+      if (DoMove)
+        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+      if (!DoMove) {
+        for (unsigned i = 0; i != NumMove; ++i)
+          Ops.pop_back();
+      } else {
+        // This is the new location for the loads / stores.
+        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+          ++InsertPos;
+        for (unsigned i = 0; i != NumMove; ++i) {
+          MachineInstr *Op = Ops.back();
+          Ops.pop_back();
+          MBB->splice(InsertPos, MBB, Op);
+        }
+
+        NumLdStMoved += NumMove;
+        RetVal = true;
+      }
+    }
+  }
+
+  return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+  bool RetVal = false;
+
+  DenseMap<MachineInstr*, unsigned> MI2LocMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+  SmallVector<unsigned, 4> LdBases;
+  SmallVector<unsigned, 4> StBases;
+
+  unsigned Loc = 0;
+  MachineBasicBlock::iterator MBBI = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  while (MBBI != E) {
+    for (; MBBI != E; ++MBBI) {
+      MachineInstr *MI = MBBI;
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.isCall() || TID.isTerminator()) {
+        // Stop at barriers.
+        ++MBBI;
+        break;
+      }
+
+      MI2LocMap[MI] = Loc++;
+      if (!isMemoryOp(MI))
+        continue;
+      unsigned PredReg = 0;
+      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+        continue;
+
+      int Opcode = MI->getOpcode();
+      bool isLd = Opcode == ARM::LDR ||
+        Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+      unsigned Base = MI->getOperand(1).getReg();
+      int Offset = getMemoryOpOffset(MI);
+
+      bool StopHere = false;
+      if (isLd) {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2LdsMap.find(Base);
+        if (BI != Base2LdsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2LdsMap[Base] = MIs;
+          LdBases.push_back(Base);
+        }
+      } else {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2StsMap.find(Base);
+        if (BI != Base2StsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2StsMap[Base] = MIs;
+          StBases.push_back(Base);
+        }
+      }
+
+      if (StopHere) {
+        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+        --Loc;
+        break;
+      }
+    }
+
+    // Re-schedule loads.
+    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+      unsigned Base = LdBases[i];
+      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+      if (Lds.size() > 1)
+        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+    }
+
+    // Re-schedule stores.
+    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+      unsigned Base = StBases[i];
+      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+      if (Sts.size() > 1)
+        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+    }
+
+    if (MBBI != E) {
+      Base2LdsMap.clear();
+      Base2StsMap.clear();
+      LdBases.clear();
+      StBases.clear();
+    }
+  }
+
+  return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+  if (PreAlloc)
+    return new ARMPreAllocLoadStoreOpt();
+  return new ARMLoadStoreOpt();
+}
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,

 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
+
+//===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [S0, S2, S4, S6, S8, S10, S12, S14,
+                    S16, S18, S20, S22, S24, S26, S28, S30]>;
+
+def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [S1, S3, S5, S7, S9, S11, S13, S15,
+                    S17, S19, S21, S23, S25, S27, S29, S31]>;
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@ -14,6 +14,8 @@
 #include "ARMSubtarget.h"
 #include "ARMGenSubtarget.inc"
 #include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;

 ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
  , CPUString("generic")
  , TargetType(isELF) // Default to ELF unless otherwise specified.
  , TargetABI(ARM_ABI_APCS) {
+  // default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
+
  // Determine default and user specified characteristics

  // Parse features string.
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@ -23,7 +23,7 @@ class Module;
 class ARMSubtarget : public TargetSubtarget {
 protected:
  enum ARMArchEnum {
-    V4T, V5T, V5TE, V6, V7A
+    V4T, V5T, V5TE, V6, V6T2, V7A
  };

  enum ARMFPEnum {
@ -92,6 +92,7 @@ protected:
  bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
  bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
  bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
+  bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
  bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }

  bool hasVFP2() const { return ARMFPUType >= VFPv2; }
@ -105,6 +106,7 @@ protected:
  bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }

  bool isThumb() const { return IsThumb; }
+  bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }

  bool useThumbBacktraces() const { return UseThumbBacktraces; }
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@ -23,6 +23,9 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;

+static cl::opt<bool>
+EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
+                  cl::desc("Enable pre-regalloc load store optimization pass"));
 static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
                              cl::desc("Disable load store optimization pass"));
 static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
  return false;
 }

+bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (!EnablePreLdStOpti)
+    return false;
+  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+    PM.add(createARMLoadStoreOptimizationPass(true));
+  return true;
+}
+
 bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                      CodeGenOpt::Level OptLevel) {
  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@ -71,6 +71,7 @@ public:

  // Pass Pipeline Configuration
  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
  virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel,
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget
  Target.cpp
  TargetAsmInfo.cpp
  TargetData.cpp
+  TargetELFWriterInfo.cpp
  TargetFrameInfo.cpp
  TargetInstrInfo.cpp
  TargetMachOWriterInfo.cpp
@ -14,4 +15,4 @@ add_llvm_library(LLVMTarget
  TargetSubtarget.cpp
  )

-# TODO: Support other targets besides X86. See Makefile.
+# TODO: Support other targets besides X86. See Makefile.
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@ -33,8 +33,9 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
  return true;
 }

-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
+/// runOnMachineFunction - This emits the frame section, autos section and 
+/// assembly for each instruction. Also takes care of function begin debug
+/// directive and file begin debug directive (if required) for the function.
 ///
 bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  this->MF = &MF;
@ -47,20 +48,38 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  const Function *F = MF.getFunction();
  CurrentFnName = Mang->getValueName(F);

-  DbgInfo.EmitFileDirective(F);
-  // Emit the function variables.
+  // Iterate over the first basic block instructions to find if it has a
+  // DebugLoc. If so emit .file directive. Instructions such as movlw do not
+  // have valid DebugLoc, so need to iterate over instructions.
+  MachineFunction::const_iterator I = MF.begin();
+  for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
+       MBBI != E; MBBI++) {
+    const DebugLoc DLoc = MBBI->getDebugLoc();
+    if (!DLoc.isUnknown()) {
+      GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
+      unsigned line = MF.getDebugLocTuple(DLoc).Line;
+      DbgInfo.EmitFileDirective(CU);
+      DbgInfo.SetFunctBeginLine(line);
+      break;
+    }
+  }
+
+  // Emit the function frame (args and temps).
  EmitFunctionFrame(MF);

-  // Emit function begin debug directives
+  // Emit function begin debug directive.
  DbgInfo.EmitFunctBeginDI(F);

+  // Emit the autos section of function.
  EmitAutos(CurrentFnName);
+
+  // Now emit the instructions of function in its code section.
  const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str();
 
  const Section *fCodeSection = TAI->getNamedSection(codeSection,
                                                     SectionFlags::Code);
-  O <<  "\n";
  // Start the Code Section.
+  O <<  "\n";
  SwitchToSection (fCodeSection);

  // Emit the frame address of the function at the beginning of code.
@ -77,14 +96,17 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  // Print out code for the function.
  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
+
    // Print a label for the basic block.
    if (I != MF.begin()) {
      printBasicBlockLabel(I, true);
      O << '\n';
    }
    
+    // Print a basic block.
    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
         II != E; ++II) {
+
      // Emit the line directive if source line changed.
      const DebugLoc DL = II->getDebugLoc();
      if (!DL.isUnknown()) {
@ -102,6 +124,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  
  // Emit function end debug directives.
  DbgInfo.EmitFunctEndDI(F, CurLine);
+
  return false;  // we didn't modify anything.
 }

@ -158,11 +181,16 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
  }
 }

+/// printCCOperand - Print the cond code operand.
+///
 void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
  int CC = (int)MI->getOperand(opNum).getImm();
  O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
 }

+/// printLibcallDecls - print the extern declarations for compiler 
+/// intrinsics.
+///
 void PIC16AsmPrinter::printLibcallDecls(void) {
  // If no libcalls used, return.
  if (LibcallDecls.empty()) return;
@ -180,6 +208,10 @@ void PIC16AsmPrinter::printLibcallDecls(void) {
  O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n";
 }

+/// doInitialization - Perfrom Module level initializations here.
+/// One task that we do here is to sectionize all global variables.
+/// The MemSelOptimizer pass depends on the sectionizing.
+///
 bool PIC16AsmPrinter::doInitialization (Module &M) {
  bool Result = AsmPrinter::doInitialization(M);

@ -194,23 +226,23 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
    I->setSection(TAI->SectionForGlobal(I)->getName());
  }

-  DbgInfo.EmitFileDirective(M);
+  DbgInfo.Init(M);
  EmitFunctionDecls(M);
  EmitUndefinedVars(M);
  EmitDefinedVars(M);
  EmitIData(M);
  EmitUData(M);
  EmitRomData(M);
-  DbgInfo.PopulateFunctsDI(M);
  return Result;
 }

-// Emit extern decls for functions imported from other modules, and emit
-// global declarations for function defined in this module and which are
-// available to other modules.
+/// Emit extern decls for functions imported from other modules, and emit
+/// global declarations for function defined in this module and which are
+/// available to other modules.
+///
 void PIC16AsmPrinter::EmitFunctionDecls (Module &M) {
 // Emit declarations for external functions.
-  O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+  O <<"\n"<<TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
  for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
    std::string Name = Mang->getValueName(I);
    if (Name.compare("@abort") == 0)
@ -280,6 +312,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)

 bool PIC16AsmPrinter::doFinalization(Module &M) {
  printLibcallDecls();
+  EmitRemainingAutos();
  DbgInfo.EmitVarDebugInfo(M);
  DbgInfo.EmitEOF();
  O << "\n\t" << "END\n";
@ -383,6 +416,8 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
  for (unsigned i = 0; i < AutosSections.size(); i++) {
    O << "\n";
    if (AutosSections[i]->S_->getName() == SectionName) { 
+      // Set the printing status to true
+      AutosSections[i]->setPrintedStatus(true);
      SwitchToSection(AutosSections[i]->S_);
      std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
      for (unsigned j = 0; j < Items.size(); j++) {
@ -398,3 +433,34 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
  }
 }

+// Print autos that were not printed during the code printing of functions.
+// As the functions might themselves would have got deleted by the optimizer.
+void PIC16AsmPrinter::EmitRemainingAutos()
+{
+  const TargetData *TD = TM.getTargetData();
+
+  // Now print Autos section for this function.
+  std::vector <PIC16Section *>AutosSections = PTAI->AutosSections;
+  for (unsigned i = 0; i < AutosSections.size(); i++) {
+    
+    // if the section is already printed then don't print again
+    if (AutosSections[i]->isPrinted()) 
+      continue;
+
+    // Set status as printed
+    AutosSections[i]->setPrintedStatus(true);
+
+    O << "\n";
+    SwitchToSection(AutosSections[i]->S_);
+    std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
+    for (unsigned j = 0; j < Items.size(); j++) {
+      std::string VarName = Mang->getValueName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      const Type *Ty = C->getType();
+      unsigned Size = TD->getTypeAllocSize(Ty);
+      // Emit memory reserve directive.
+      O << VarName << "  RES  " << Size << "\n";
+    }
+  }
+}
+
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@ -52,6 +52,7 @@ namespace llvm {
    void EmitIData (Module &M);
    void EmitUData (Module &M);
    void EmitAutos (std::string FunctName);
+    void EmitRemainingAutos ();
    void EmitRomData (Module &M);
    void EmitFunctionFrame(MachineFunction &MF);
    void printLibcallDecls(void);
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@ -18,13 +18,6 @@

 using namespace llvm;

-PIC16DbgInfo::~PIC16DbgInfo() {
-  for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin();
-      i!=FunctNameMap.end(); i++) 
-    delete i->second;
-  FunctNameMap.clear();
-}
-
 void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
                                     bool &HasAux, int Aux[], 
                                     std::string &TypeName) {
@ -70,7 +63,7 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
        }
        HasAux = true;
        // In auxillary entry for array, 7th and 8th byte represent array size.
-        Aux[6] = size;
+        Aux[6] = size & 0xff;
        Aux[7] = size >> 8;
        DIType BaseType = CTy.getTypeDerivedFrom();
        PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
@ -86,10 +79,14 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
        else
          TypeNo = TypeNo | PIC16Dbg::T_UNION;
        CTy.getName(TypeName);
-        unsigned size = CTy.getSizeInBits()/8;
+        // UniqueSuffix is .number where number is obtained from 
+        // llvm.dbg.composite<number>.
+        std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+        TypeName += UniqueSuffix;
+        unsigned short size = CTy.getSizeInBits()/8;
        // 7th and 8th byte represent size.   
        HasAux = true;
-        Aux[6] = size;
+        Aux[6] = size & 0xff;
        Aux[7] = size >> 8;
        break;
      }
@ -145,37 +142,84 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
  return ClassNo;
 }

-void PIC16DbgInfo::PopulateFunctsDI(Module &M) {
-  GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms");
-  if (!Root)
-    return;
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI) {
-      GlobalVariable *GVSP = cast<GlobalVariable>(*UUI);
-      DISubprogram *SP = new DISubprogram(GVSP);
-      std::string Name;
-      SP->getLinkageName(Name);
-      FunctNameMap[Name] = SP; 
-    }
-  return;
+void PIC16DbgInfo::Init(Module &M) {
+  // Do all debug related initializations here.
+  EmitFileDirective(M);
+  EmitCompositeTypeDecls(M);
 }

-DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) {
-  return FunctNameMap[FunctName];
+void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
+  for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
+      E = M.getGlobalList().end(); I != E; I++) {
+    // Structures and union declaration's debug info has llvm.dbg.composite
+    // in its name.
+    if(I->getName().find("llvm.dbg.composite") != std::string::npos) {
+      GlobalVariable *GV = cast<GlobalVariable >(I);
+      DICompositeType CTy(GV);
+      if (CTy.getTag() == dwarf::DW_TAG_union_type ||
+          CTy.getTag() == dwarf::DW_TAG_structure_type ) {
+        std::string name;
+        CTy.getName(name);
+        std::string DIVar = I->getName();
+        // Get the number after llvm.dbg.composite and make UniqueSuffix from 
+        // it.
+        std::string UniqueSuffix = "." + DIVar.substr(18);
+        std::string MangledCTyName = name + UniqueSuffix;
+        unsigned short size = CTy.getSizeInBits()/8;
+        int Aux[PIC16Dbg::AuxSize] = {0};
+        // 7th and 8th byte represent size of structure/union.
+        Aux[6] = size & 0xff;
+        Aux[7] = size >> 8;
+        // Emit .def for structure/union tag.
+        if( CTy.getTag() == dwarf::DW_TAG_union_type)
+          EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG);
+        else if  (CTy.getTag() == dwarf::DW_TAG_structure_type) 
+          EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG);
+
+        // Emit auxiliary debug information for structure/union tag. 
+        EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
+        unsigned long Value = 0;
+        DIArray Elements = CTy.getTypeArray();
+        for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+          DIDescriptor Element = Elements.getElement(i);
+          unsigned short TypeNo = 0;
+          bool HasAux = false;
+          int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+          std::string TypeName = "";
+          std::string ElementName;
+          GlobalVariable *GV = Element.getGV();
+          DIDerivedType DITy(GV);
+          DITy.getName(ElementName);
+          unsigned short ElementSize = DITy.getSizeInBits()/8;
+          // Get mangleddd name for this structure/union  element.
+          std::string MangMemName = ElementName + UniqueSuffix;
+	  PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
+          short Class;
+          if( CTy.getTag() == dwarf::DW_TAG_union_type)
+            Class = PIC16Dbg::C_MOU;
+          else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
+            Class = PIC16Dbg::C_MOS;
+          EmitSymbol(MangMemName, Class, TypeNo, Value);
+          if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+            Value += ElementSize;
+          if (HasAux)
+            EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
+        }
+        // Emit mangled Symbol for end of structure/union.
+        std::string EOSSymbol = ".eos" + UniqueSuffix;
+        EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
+        EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName);
+      }
+    }
+  }
 }

 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
  std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
+  if (EmitDebugDirectives) {
    std::string FunctBeginSym = ".bf." + FunctName;
    std::string BlockBeginSym = ".bb." + FunctName;

-    int FunctBeginLine = SP->getLineNumber();
    int BFAux[PIC16Dbg::AuxSize] = {0};
    BFAux[4] = FunctBeginLine;
    BFAux[5] = FunctBeginLine >> 8;
@ -189,8 +233,7 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {

 void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
  std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
+  if (EmitDebugDirectives) {
    std::string FunctEndSym = ".ef." + FunctName;
    std::string BlockEndSym = ".eb." + FunctName;

@ -208,14 +251,21 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {

 /// EmitAuxEntry - Emit Auxiliary debug information.
 ///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
+                                std::string tag) {
  O << "\n\t.dim " << VarName << ", 1" ;
+  if (tag != "")
+    O << ", " << tag;
  for (int i = 0; i<num; i++)
    O << "," << Aux[i];
 }

-void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) {
-  O << "\n\t" << ".def "<< Name << ", debug, class = " << Class;
+void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
+                              Type, unsigned long Value) {
+  O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " 
+    << Class;
+  if (Value > 0)
+    O  << ", value = " << Value;
 }

 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
@ -241,18 +291,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
        O << "\n\t.type " << VarName << ", " << TypeNo;
        short ClassNo = getClass(DIGV);
        O << "\n\t.class " << VarName << ", " << ClassNo;
-        if (HasAux) {
-          if (TypeName != "") {
-           // Emit debug info for structure and union objects after
-           // .dim directive supports structure/union tag name in aux entry.
-           /* O << "\n\t.dim " << VarName << ", 1," << TypeName;
-            for (int i = 0; i<PIC16Dbg::AuxSize; i++)
-              O << "," << Aux[i];*/
-         }
-          else {
-            EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize);
-          }
-        }
+        if (HasAux) 
+          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
      }
    }
  }
@ -262,26 +302,20 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
 void PIC16DbgInfo::EmitFileDirective(Module &M) {
  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
  if (CU) {
-    DICompileUnit DIUnit(CU);
-    std::string Dir, FN;
-    std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
-    O << "\n\t.file\t\"" << File << "\"\n" ;
-    CurFile = File;
+    EmitDebugDirectives = true;
+    EmitFileDirective(CU, false);
  }
 }

-void PIC16DbgInfo::EmitFileDirective(const Function *F) {
-  std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
-    std::string Dir, FN;
-    DICompileUnit CU = SP->getCompileUnit();
-    std::string File = CU.getDirectory(Dir) + "/" + CU.getFilename(FN);
-    if ( File != CurFile) {
+void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
+  std::string Dir, FN;
+  DICompileUnit DIUnit(CU);
+  std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
+  if ( File != CurFile ) {
+    if (EmitEof)
      EmitEOF();
-      O << "\n\t.file\t\"" << File << "\"\n" ;
-      CurFile = File;
-    }
+    O << "\n\t.file\t\"" << File << "\"\n" ;
+    CurFile = File;
  }
 }

@ -290,3 +324,6 @@ void PIC16DbgInfo::EmitEOF() {
    O << "\n\t.EOF";
 }

+void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
+  FunctBeginLine = line;
+}
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@ -91,29 +91,36 @@ namespace llvm {
  class raw_ostream;

  class PIC16DbgInfo {
-    std::map <std::string, DISubprogram *> FunctNameMap;
    raw_ostream &O;
    const TargetAsmInfo *TAI;
    std::string CurFile;
+    // EmitDebugDirectives is set if debug information is available. Default
+    // value for it is false.
+    bool EmitDebugDirectives;
+    unsigned FunctBeginLine;
  public:
    PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
-      CurFile = "";  
+      CurFile = ""; 
+      EmitDebugDirectives = false; 
    }
-    ~PIC16DbgInfo();
    void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
                           int Aux[], std::string &TypeName);
    unsigned GetTypeDebugNumber(std::string &type);
    short getClass(DIGlobalVariable DIGV);
-    void PopulateFunctsDI(Module &M);
-    DISubprogram *getFunctDI(std::string FunctName);
    void EmitFunctBeginDI(const Function *F);
+    void Init(Module &M);
+    void EmitCompositeTypeDecls(Module &M);
    void EmitFunctEndDI(const Function *F, unsigned Line);
-    void EmitAuxEntry(const std::string VarName, int Aux[], int num);
-    inline void EmitSymbol(std::string Name, int Class);
+    void EmitAuxEntry(const std::string VarName, int Aux[], 
+                      int num = PIC16Dbg::AuxSize, std::string tag = "");
+    inline void EmitSymbol(std::string Name, short Class, 
+                           unsigned short Type = PIC16Dbg::T_NULL, 
+                           unsigned long Value = 0);
    void EmitVarDebugInfo(Module &M);
    void EmitFileDirective(Module &M);
-    void EmitFileDirective(const Function *F);
+    void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
    void EmitEOF();
+    void SetFunctBeginLine(unsigned line);
  };
 } // end namespace llvm;
 #endif
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@ -56,6 +56,17 @@ static const char *getIntrinsicName(unsigned opcode) {
  case RTLIB::SREM_I32: Basename = "srem.i32"; break;
  case RTLIB::UREM_I16: Basename = "urem.i16"; break;
  case RTLIB::UREM_I32: Basename = "urem.i32"; break;
+
+  case RTLIB::FPTOSINT_F32_I32:
+               Basename = "f32_to_si32"; break;
+  case RTLIB::SINTTOFP_I32_F32:
+               Basename = "si32_to_f32"; break;
+               
+  case RTLIB::ADD_F32: Basename = "add.f32"; break;
+  case RTLIB::SUB_F32: Basename = "sub.f32"; break;
+  case RTLIB::MUL_F32: Basename = "mul.f32"; break;
+  case RTLIB::DIV_F32: Basename = "div.f32"; break;
+  
  }
  
  std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@ -113,7 +124,17 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
  // Unsigned remainder lib call names
  setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
  setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
-  
+ 
+  // Floating point operations
+  setLibcallName(RTLIB::FPTOSINT_F32_I32, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+  setLibcallName(RTLIB::SINTTOFP_I32_F32, 
+                 getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+  setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
+  setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
+  setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
+  setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
+
  setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
  setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);

--- a/lib/Target/PIC16/PIC16TargetAsmInfo.h
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h
@ -33,9 +33,13 @@ namespace llvm {
  struct PIC16Section {
      const Section *S_; // Connection to actual Section.
      unsigned Size;  // Total size of the objects contained.
+      bool SectionPrinted;
      std::vector<const GlobalVariable*> Items;
     
-      PIC16Section (const Section *s) { S_ = s; Size = 0; }
+      PIC16Section (const Section *s) { S_ = s; Size = 0; 
+                                        SectionPrinted = false;}
+      bool isPrinted() { return SectionPrinted ; }
+      void setPrintedStatus(bool status) { SectionPrinted = status ;} 
  };
      
  struct PIC16TargetAsmInfo : public TargetAsmInfo {
--- a/lib/Target/TargetELFWriterInfo.cpp
+++ b/lib/Target/TargetELFWriterInfo.cpp
@ -0,0 +1,36 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
+/// getFunctionAlignment - Returns the alignment for function 'F', targets
+/// with different alignment constraints should overload this method
+unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const {
+  const TargetData *TD = TM.getTargetData();
+  unsigned FnAlign = F->getAlignment();
+  unsigned TDAlign = TD->getPointerABIAlignment();
+  unsigned Align = std::max(FnAlign, TDAlign);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+  return Align;
+}
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@ -30,6 +30,7 @@ namespace llvm {
  bool FiniteOnlyFPMathOption;
  bool HonorSignDependentRoundingFPMathOption;
  bool UseSoftFloat;
+  FloatABI::ABIType FloatABIType;
  bool NoImplicitFloat;
  bool NoZerosInBSS;
  bool ExceptionHandling;
@ -84,6 +85,19 @@ GenerateSoftFloatCalls("soft-float",
  cl::desc("Generate software floating point library calls"),
  cl::location(UseSoftFloat),
  cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::location(FloatABIType),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
 static cl::opt<bool, true>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
  cl::desc("Don't place zero-initialized symbols into bss section"),
@ -162,6 +176,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
 // TargetMachine Class
 //

+TargetMachine::TargetMachine() 
+  : AsmInfo(0) {
+  // Typically it will be subtargets that will adjust FloatABIType from Default
+  // to Soft or Hard.
+  if (UseSoftFloat)
+    FloatABIType = FloatABI::Soft;
+}
+
 TargetMachine::~TargetMachine() {
  delete AsmInfo;
 }
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -482,35 +482,6 @@ _usesbb:

 //===---------------------------------------------------------------------===//

-Currently we don't have elimination of redundant stack manipulations. Consider
-the code:
-
-int %main() {
-entry:
-	call fastcc void %test1( )
-	call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
-	ret int 0
-}
-
-declare fastcc void %test1()
-
-declare fastcc void %test2(sbyte*)
-
-
-This currently compiles to:
-
-	subl $16, %esp
-	call _test5
-	addl $12, %esp
-	subl $16, %esp
-	movl $_test5, (%esp)
-	call _test6
-	addl $12, %esp
-
-The add\sub pair is really unneeded here.
-
-//===---------------------------------------------------------------------===//
-
 Consider the expansion of:

 define i32 @test3(i32 %X) {
@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0.

 //===---------------------------------------------------------------------===//

-We aren't matching RMW instructions aggressively
-enough.  Here's a reduced testcase (more in PR1160):
-
-define void @test(i32* %huge_ptr, i32* %target_ptr) {
-        %A = load i32* %huge_ptr                ; <i32> [#uses=1]
-        %B = load i32* %target_ptr              ; <i32> [#uses=1]
-        %C = or i32 %A, %B              ; <i32> [#uses=1]
-        store i32 %C, i32* %target_ptr
-        ret void
-}
-
-$ llvm-as < t.ll | llc -march=x86-64
-
-_test:
-        movl (%rdi), %eax
-        orl (%rsi), %eax
-        movl %eax, (%rsi)
-        ret
-
-That should be something like:
-
-_test:
-        movl (%rdi), %eax
-        orl %eax, (%rsi)
-        ret
-
-//===---------------------------------------------------------------------===//
-
 The following code:

 bb114.preheader:		; preds = %cond_next94
@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
  Core 2, and "Generic"

 //===---------------------------------------------------------------------===//
+
+Testcase:
+int a(int x) { return (x & 127) > 31; }
+
+Current output:
+	movl	4(%esp), %eax
+	andl	$127, %eax
+	cmpl	$31, %eax
+	seta	%al
+	movzbl	%al, %eax
+	ret
+
+Ideal output:
+	xorl	%eax, %eax
+	testl	$96, 4(%esp)
+	setne	%al
+	ret
+
+We could do this transformation in instcombine, but it's only clearly
+beneficial on platforms with a test instruction.
+
+//===---------------------------------------------------------------------===//
+Testcase:
+int x(int a) { return (a&0xf0)>>4; }
+
+Current output:
+	movl	4(%esp), %eax
+	shrl	$4, %eax
+	andl	$15, %eax
+	ret
+
+Ideal output:
+	movzbl	4(%esp), %eax
+	shrl	$4, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+Testcase:
+int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+
+Current output:
+	testl	$128, 4(%esp)
+	setne	%al
+	movzbl	%al, %eax
+	shll	$8, %eax
+	ret
+
+Ideal output:
+	movl	4(%esp), %eax
+	addl	%eax, %eax
+	andl	$256, %eax
+	ret
+
+We generally want to fold shifted tests of a single bit into a shift+and on x86.
+
+//===---------------------------------------------------------------------===//
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[
  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
 ]>;

-// Tail call convention (fast): One register is reserved for target address,
-// namely R9
-def CC_X86_64_TailCall : CallingConv<[
-  // Handles byval parameters.
-  CCIfByVal<CCPassByVal<8, 8>>,
-
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
-  // The 'nest' parameter, if any, is passed in R10.
-  CCIfNest<CCAssignToReg<[R10]>>,
-
-  // The first 6 integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
-  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
-  
-  // The first 8 FP/Vector arguments are passed in XMM registers.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasSSE1()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
-  // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
-  // registers on Darwin.
-  CCIfType<[v8i8, v4i16, v2i32, v2f32],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- 
-  // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
-  CCIfType<[v1i64],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
- 
-  // Integer/FP values get stored in stack slots that are 8 bytes in size and
-  // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-  
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
-]>;
-
-
 //===----------------------------------------------------------------------===//
 // X86 C Calling Convention
 //===----------------------------------------------------------------------===//
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@ -12,8 +12,27 @@
 //===----------------------------------------------------------------------===//

 #include "X86ELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;

-X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
-  TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM) {
+    bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+    EMachine = is64Bit ? EM_X86_64 : EM_386;
+  }
+
 X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
+  unsigned FnAlign = 4;
+
+  if (F->hasFnAttr(Attribute::OptimizeForSize))
+    FnAlign = 1;
+
+  if (F->getAlignment())
+    FnAlign = Log2_32(F->getAlignment());
+
+  return (1 << FnAlign);
+}
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@ -20,8 +20,10 @@ namespace llvm {

  class X86ELFWriterInfo : public TargetELFWriterInfo {
  public:
-    X86ELFWriterInfo(bool is64Bit);
+    X86ELFWriterInfo(TargetMachine &TM);
    virtual ~X86ELFWriterInfo();
+
+    virtual unsigned getFunctionAlignment(const Function *F) const;
  };

 } // end llvm namespace
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
  if (Subtarget->is64Bit()) {
    if (Subtarget->isTargetWin64())
      return CC_X86_Win64_C;
-    else if (CC == CallingConv::Fast && isTaillCall)
-      return CC_X86_64_TailCall;
    else
      return CC_X86_64_C;
  }
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
    SDValue StackAdjustment = TailCall.getOperand(2);
    assert(((TargetAddress.getOpcode() == ISD::Register &&
               (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
-                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
              TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
              TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
             "Expecting an global address, external symbol, or register");
@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
  if (Subtarget->is64Bit()) {
    if (Subtarget->isTargetWin64())
      return CC_X86_Win64_C;
-    else if (CC == CallingConv::Fast && PerformTailCallOpt)
-      return CC_X86_64_TailCall;
    else
      return CC_X86_64_C;
  }
@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
  } else if (IsTailCall) {
-    unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+    unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;

    Chain = DAG.getCopyToReg(Chain,  dl,
                             DAG.getRegister(Opc, getPointerTy()),
@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
                                     SelectionDAG &DAG, MachineFrameInfo *MFI,
                                     const TargetLowering &TLI) {
  LDBase = NULL;
-  LastLoadedElt = -1;
+  LastLoadedElt = -1U;
  for (unsigned i = 0; i < NumElems; ++i) {
    if (N->getMaskElt(i) < 0) {
      if (!LDBase)
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
  }

+  unsigned ReadyLabelId = 0;
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer is ready.
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+  }
+
  // Skip the callee-saved push instructions.
  while (MBBI != MBB.end() &&
         (MBBI->getOpcode() == X86::PUSH32r ||
@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
  if (MBBI != MBB.end())
    DL = MBBI->getDebugLoc();

-  if (NumBytes) {   // Adjust stack pointer: ESP -= numbytes.
-    if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
-      // Check, whether EAX is livein for this function.
-      bool isEAXAlive = false;
-      for (MachineRegisterInfo::livein_iterator
+  // Adjust stack pointer: ESP -= numbytes.
+  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+    // Check, whether EAX is livein for this function.
+    bool isEAXAlive = false;
+    for (MachineRegisterInfo::livein_iterator
           II = MF.getRegInfo().livein_begin(),
           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
-        unsigned Reg = II->first;
-        isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                      Reg == X86::AH  || Reg == X86::AL);
-      }
-
-      // Function prologue calls _alloca to probe the stack when allocating more
-      // than 4k bytes in one go. Touching the stack at 4K increments is
-      // necessary to ensure that the guard pages used by the OS virtual memory
-      // manager are allocated in correct sequence.
-      if (!isEAXAlive) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-          .addImm(NumBytes);
-        BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-          .addExternalSymbol("_alloca");
-      } else {
-        // Save EAX
-        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
-          .addReg(X86::EAX, RegState::Kill);
-
-        // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
-        // allocated bytes for EAX.
-        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-          .addImm(NumBytes-4);
-        BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-          .addExternalSymbol("_alloca");
-
-        // Restore EAX
-        MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
-                                                X86::EAX),
-                                        StackPtr, false, NumBytes-4);
-        MBB.insert(MBBI, MI);
-      }
-    } else {
-      // If there is an SUB32ri of ESP immediately before this instruction,
-      // merge the two. This can be the case when tail call elimination is
-      // enabled and the callee has more arguments then the caller.
-      NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
-      // If there is an ADD32ri or SUB32ri of ESP immediately after this
-      // instruction, merge the two instructions.
-      mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
-      if (NumBytes)
-        emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+      unsigned Reg = II->first;
+      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+                    Reg == X86::AH || Reg == X86::AL);
    }
+
+    // Function prologue calls _alloca to probe the stack when allocating more
+    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
+    // to ensure that the guard pages used by the OS virtual memory manager are
+    // allocated in correct sequence.
+    if (!isEAXAlive) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+        .addExternalSymbol("_alloca");
+    } else {
+      // Save EAX
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+        .addReg(X86::EAX, RegState::Kill);
+
+      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+      // allocated bytes for EAX.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes - 4);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+        .addExternalSymbol("_alloca");
+
+      // Restore EAX
+      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                              X86::EAX),
+                                      StackPtr, false, NumBytes - 4);
+      MBB.insert(MBBI, MI);
+    }
+  } else if (NumBytes) {
+    // If there is an SUB32ri of ESP immediately before this instruction, merge
+    // the two. This can be the case when tail call elimination is enabled and
+    // the callee has more arguments then the caller.
+    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+    // If there is an ADD32ri or SUB32ri of ESP immediately after this
+    // instruction, merge the two instructions.
+    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+    if (NumBytes)
+      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
  }

-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    unsigned ReadyLabelId = 0;
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+  if (needsFrameMoves)
    emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
-  }
 }

 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
  , MaxInlineSizeThreshold(128)
  , Is64Bit(is64Bit)
  , TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;
    
  // Determine default and user specified characteristics
  if (!FS.empty()) {
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
    DataLayout(Subtarget.getDataLayout()),
    FrameInfo(TargetFrameInfo::StackGrowsDown,
              Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this),
-    ELFWriterInfo(Subtarget.is64Bit()) {
+    InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
  DefRelocModel = getRelocationModel();
  // FIXME: Correctly select PIC model for Win64 stuff
  if (getRelocationModel() == Reloc::Default) {
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@ -127,17 +127,8 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
-       UI != E; ++UI) {
-    CallSite CS = CallSite::get(*UI);
-    if (!CS.getInstruction())       // "Taking the address" of the function
-      return false;
-
-    // Ensure that this call site is CALLING the function, not passing it as
-    // an argument.
-    if (!CS.isCallee(UI))
-      return false;
-  }
+  if (F->hasAddressTaken())
+    return false;

  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@ -175,15 +175,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
  if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;

  // Ensure that the function is only directly called.
-  for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) {
-    // If this use is anything other than a call site, give up.
-    CallSite CS = CallSite::get(*I);
-    Instruction *TheCall = CS.getInstruction();
-    if (!TheCall) return false;   // Not a direct call site?
-
-    // The addr of this function is passed to the call.
-    if (!CS.isCallee(I)) return false;
-  }
+  if (Fn.hasAddressTaken())
+    return false;

  // Okay, we know we can transform this function if safe.  Scan its body
  // looking for calls to llvm.vastart.
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@ -47,7 +47,6 @@ namespace {
    void GlobalIsNeeded(GlobalValue *GV);
    void MarkUsedGlobalsAsNeeded(Constant *C);

-    bool SafeToDestroyConstant(Constant* C);
    bool RemoveUnusedGlobalValue(GlobalValue &GV);
  };
 }
@ -211,17 +210,3 @@ bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
  GV.removeDeadConstantUsers();
  return GV.use_empty();
 }
-
-// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-// by constants itself.  Note that constants cannot be cyclic, so this test is
-// pretty easy to implement recursively.
-//
-bool GlobalDCE::SafeToDestroyConstant(Constant *C) {
-  for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I)
-    if (Constant *User = dyn_cast<Constant>(*I)) {
-      if (!SafeToDestroyConstant(User)) return false;
-    } else {
-      return false;
-    }
-  return true;
-}
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@ -136,16 +136,16 @@ struct VISIBILITY_HIDDEN GlobalStatus {

 }

-/// ConstantIsDead - Return true if the specified constant is (transitively)
-/// dead.  The constant may be used by other constants (e.g. constant arrays and
-/// constant exprs) as long as they are dead, but it cannot be used by anything
-/// else.
-static bool ConstantIsDead(Constant *C) {
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself.  Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(Constant *C) {
  if (isa<GlobalValue>(C)) return false;

  for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
    if (Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!ConstantIsDead(CU)) return false;
+      if (!SafeToDestroyConstant(CU)) return false;
    } else
      return false;
  return true;
@ -233,7 +233,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
    } else if (Constant *C = dyn_cast<Constant>(*UI)) {
      GS.HasNonInstructionUser = true;
      // We might have a dead and dangling constant hanging off of here.
-      if (!ConstantIsDead(C))
+      if (!SafeToDestroyConstant(C))
        return true;
    } else {
      GS.HasNonInstructionUser = true;
@ -338,7 +338,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
    } else if (Constant *C = dyn_cast<Constant>(U)) {
      // If we have a chain of dead constantexprs or other things dangling from
      // us, and if they are all dead, nuke them without remorse.
-      if (ConstantIsDead(C)) {
+      if (SafeToDestroyConstant(C)) {
        C->destroyConstant();
        // This could have invalidated UI, start over from scratch.
        CleanupConstantGlobalUsers(V, Init);
@ -354,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
 static bool isSafeSROAElementUse(Value *V) {
  // We might have a dead and dangling constant hanging off of here.
  if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantIsDead(C);
+    return SafeToDestroyConstant(C);
  
  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;
@ -1769,22 +1769,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
  return false;
 }

-/// OnlyCalledDirectly - Return true if the specified function is only called
-/// directly.  In other words, its address is never taken.
-static bool OnlyCalledDirectly(Function *F) {
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
-    Instruction *User = dyn_cast<Instruction>(*UI);
-    if (!User) return false;
-    if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
-
-    // See if the function address is passed as an argument.
-    for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
-         i != e; ++i)
-      if (*i == F) return false;
-  }
-  return true;
-}
-
 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
 /// function, changing them to FastCC.
 static void ChangeCalleesToFastCall(Function *F) {
@ -1830,7 +1814,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
      ++NumFnDeleted;
    } else if (F->hasLocalLinkage()) {
      if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
        // If this function has C calling conventions, is not a varargs
        // function, and is only called directly, promote it to use the Fast
        // calling convention.
@ -1841,7 +1825,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
      }

      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
        // The function is not used by a trampoline intrinsic, so it is safe
        // to remove the 'nest' attribute.
        RemoveNestAttribute(F);
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@ -9,10 +9,6 @@
 //
 // This pass looks for equivalent functions that are mergable and folds them.
 //
-// A Function will not be analyzed if:
-// * it is overridable at runtime (except for weak linkage), or
-// * it is used by anything other than the callee parameter of a call/invoke
-//
 // A hash is computed from the function, based on its type and number of
 // basic blocks.
 //
@ -24,8 +20,6 @@
 // When a match is found, the functions are folded. We can only fold two
 // functions when we know that the definition of one of them is not
 // overridable.
-// * fold a function marked internal by replacing all of its users.
-// * fold extern or weak functions by replacing them with a global alias
 //
 //===----------------------------------------------------------------------===//
 //
@ -48,6 +42,7 @@
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
@ -62,7 +57,6 @@
 using namespace llvm;

 STATISTIC(NumFunctionsMerged, "Number of functions merged");
-STATISTIC(NumMergeFails, "Number of identical function pairings not merged");

 namespace {
  struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
@ -81,16 +75,169 @@ ModulePass *llvm::createMergeFunctionsPass() {
  return new MergeFunctions();
 }

+// ===----------------------------------------------------------------------===
+// Comparison of functions
+// ===----------------------------------------------------------------------===
+
 static unsigned long hash(const Function *F) {
-  return F->size() ^ reinterpret_cast<unsigned long>(F->getType());
-  //return F->size() ^ F->arg_size() ^ F->getReturnType();
+  const FunctionType *FTy = F->getFunctionType();
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
+}
+
+/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
+/// walking the chain of cast operands. Otherwise, returns the argument.
+static Value* IgnoreBitcasts(Value *V) {
+  while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
+    V = BC->getOperand(0);
+
+  return V;
+}
+
+/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
+/// type equivalence rules apply.
+static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+  if (Ty1 == Ty2)
+    return true;
+  if (Ty1->getTypeID() != Ty2->getTypeID())
+    return false;
+
+  switch(Ty1->getTypeID()) {
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+    return true;
+
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
+  default:
+    assert(0 && "Unknown type!");
+    return false;
+
+  case Type::PointerTyID: {
+    const PointerType *PTy1 = cast<PointerType>(Ty1);
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+  }
+
+  case Type::StructTyID: {
+    const StructType *STy1 = cast<StructType>(Ty1);
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy1->getNumElements() != STy2->getNumElements())
+      return false;
+
+    if (STy1->isPacked() != STy2->isPacked())
+      return false;
+
+    for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy1->getNumParams() != FTy2->getNumParams() ||
+        FTy1->isVarArg() != FTy2->isVarArg())
+      return false;
+
+    if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+      return false;
+
+    for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+      if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::ArrayTyID:
+  case Type::VectorTyID: {
+    const SequentialType *STy1 = cast<SequentialType>(Ty1);
+    const SequentialType *STy2 = cast<SequentialType>(Ty2);
+    return isEquivalentType(STy1->getElementType(), STy2->getElementType());
+  }
+  }
+}
+
+/// isEquivalentOperation - determine whether the two operations are the same
+/// except that pointer-to-A and pointer-to-B are equivalent. This should be
+/// kept in sync with Instruction::isSameOperationAs.
+static bool
+isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+  if (I1->getOpcode() != I2->getOpcode() ||
+      I1->getNumOperands() != I2->getNumOperands() ||
+      !isEquivalentType(I1->getType(), I2->getType()))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+    if (!isEquivalentType(I1->getOperand(i)->getType(),
+                          I2->getOperand(i)->getType()))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(I1))
+    return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<CallInst>(I2)->getAttributes().getRawPointer();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
 }

 static bool compare(const Value *V, const Value *U) {
  assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
         "Must not compare basic blocks.");

-  assert(V->getType() == U->getType() &&
+  assert(isEquivalentType(V->getType(), U->getType()) &&
        "Two of the same operation have operands of different type.");

  // TODO: If the constant is an expression of F, we should accept that it's
@ -117,19 +264,39 @@ static bool compare(const Value *V, const Value *U) {
 static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
                   DenseMap<const Value *, const Value *> &ValueMap,
                   DenseMap<const Value *, const Value *> &SpeculationMap) {
-  // Specutively add it anyways. If it's false, we'll notice a difference later, and
-  // this won't matter.
+  // Speculatively add it anyways. If it's false, we'll notice a difference
+  // later, and this won't matter.
  ValueMap[BB1] = BB2;

  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();

  do {
-    if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI)))
+    if (isa<BitCastInst>(FI)) {
+      ++FI;
+      continue;
+    }
+    if (isa<BitCastInst>(GI)) {
+      ++GI;
+      continue;
+    }
+
+    if (!isEquivalentOperation(FI, GI))
      return false;

-    if (FI->getNumOperands() != GI->getNumOperands())
-      return false;
+    if (isa<GetElementPtrInst>(FI)) {
+      const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
+      const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
+      if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
+        // It's effectively a bitcast.
+        ++FI, ++GI;
+        continue;
+      }
+
+      // TODO: we only really care about the elements before the index
+      if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+        return false;
+    }

    if (ValueMap[FI] == GI) {
      ++FI, ++GI;
@ -140,8 +307,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
      return false;

    for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-      Value *OpF = FI->getOperand(i);
-      Value *OpG = GI->getOperand(i);
+      Value *OpF = IgnoreBitcasts(FI->getOperand(i));
+      Value *OpG = IgnoreBitcasts(GI->getOperand(i));

      if (ValueMap[OpF] == OpG)
        continue;
@ -149,10 +316,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
      if (ValueMap[OpF] != NULL)
        return false;

-      assert(OpF->getType() == OpG->getType() &&
-             "Two of the same operation has operands of different type.");
-
-      if (OpF->getValueID() != OpG->getValueID())
+      if (OpF->getValueID() != OpG->getValueID() ||
+          !isEquivalentType(OpF->getType(), OpG->getType()))
        return false;

      if (isa<PHINode>(FI)) {
@ -203,14 +368,15 @@ static bool equals(const Function *F, const Function *G) {
  if (F->hasSection() && F->getSection() != G->getSection())
    return false;

+  if (F->isVarArg() != G->isVarArg())
+    return false;
+
  // TODO: if it's internal and only used in direct calls, we could handle this
  // case too.
  if (F->getCallingConv() != G->getCallingConv())
    return false;

-  // TODO: We want to permit cases where two functions take T* and S* but
-  // only load or store them into T** and S**.
-  if (F->getType() != G->getType())
+  if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
    return false;

  DenseMap<const Value *, const Value *> ValueMap;
@ -237,88 +403,212 @@ static bool equals(const Function *F, const Function *G) {
  return true;
 }

-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
-  if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden())
-    std::swap(FnVec[i], FnVec[j]);
+// ===----------------------------------------------------------------------===
+// Folding of functions
+// ===----------------------------------------------------------------------===

+// Cases:
+// * F is external strong, G is external strong:
+//   turn G into a thunk to F    (1)
+// * F is external strong, G is external weak:
+//   turn G into a thunk to F    (1)
+// * F is external weak, G is external weak:
+//   unfoldable
+// * F is external strong, G is internal:
+//   address of G taken:
+//     turn G into a thunk to F  (1)
+//   address of G not taken:
+//     make G an alias to F      (2)
+// * F is internal, G is external weak
+//   address of F is taken:
+//     turn G into a thunk to F  (1)
+//   address of F is not taken:
+//     make G an alias of F      (2)
+// * F is internal, G is internal:
+//   address of F and G are taken:
+//     turn G into a thunk to F  (1)
+//   address of G is not taken:
+//     make G an alias to F      (2)
+//
+// alias requires linkage == (external,local,weak) fallback to creating a thunk
+// external means 'externally visible' linkage != (internal,private)
+// internal means linkage == (internal,private)
+// weak means linkage mayBeOverridable
+// being external implies that the address is taken
+//
+// 1. turn G into a thunk to F
+// 2. make G an alias to F
+
+enum LinkageCategory {
+  ExternalStrong,
+  ExternalWeak,
+  Internal
+};
+
+static LinkageCategory categorize(const Function *F) {
+  switch (F->getLinkage()) {
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    return Internal;
+
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalWeakLinkage:
+    return ExternalWeak;
+
+  case GlobalValue::ExternalLinkage:
+  case GlobalValue::AvailableExternallyLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::AppendingLinkage:
+  case GlobalValue::DLLImportLinkage:
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::GhostLinkage:
+  case GlobalValue::CommonLinkage:
+    return ExternalStrong;
+  }
+
+  assert(0 && "Unknown LinkageType.");
+  return ExternalWeak;
+}
+
+static void ThunkGToF(Function *F, Function *G) {
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create("", NewG);
+
+  std::vector<Value *> Args;
+  unsigned i = 0;
+  const FunctionType *FFTy = F->getFunctionType();
+  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+       AI != AE; ++AI) {
+    if (FFTy->getParamType(i) == AI->getType())
+      Args.push_back(AI);
+    else {
+      Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
+      Args.push_back(BCI);
+    }
+    ++i;
+  }
+
+  CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  if (NewG->getReturnType() == Type::VoidTy) {
+    ReturnInst::Create(BB);
+  } else if (CI->getType() != NewG->getReturnType()) {
+    Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
+    ReturnInst::Create(BCI, BB);
+  } else {
+    ReturnInst::Create(CI, BB);
+  }
+
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
+
+  // TODO: look at direct callers to G and make them all direct callers to F.
+}
+
+static void AliasGToF(Function *F, Function *G) {
+  if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
+    return ThunkGToF(F, G);
+
+  GlobalAlias *GA = new GlobalAlias(
+    G->getType(), G->getLinkage(), "",
+    ConstantExpr::getBitCast(F, G->getType()), G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+}
+
+static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
  Function *F = FnVec[i];
  Function *G = FnVec[j];

-  if (!F->mayBeOverridden()) {
-    if (G->hasLocalLinkage()) {
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      G->replaceAllUsesWith(F);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
-    }
+  LinkageCategory catF = categorize(F);
+  LinkageCategory catG = categorize(G);

-    if (G->hasExternalLinkage() || G->hasWeakLinkage()) {
-      GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      GA->takeName(G);
-      GA->setVisibility(G->getVisibility());
-      G->replaceAllUsesWith(GA);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
-    }
+  if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
+    std::swap(FnVec[i], FnVec[j]);
+    std::swap(F, G);
+    std::swap(catF, catG);
  }

-  if (F->hasWeakLinkage() && G->hasWeakLinkage()) {
-    GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "",
-                                        0, F->getParent());
-    GA_F->takeName(F);
-    GA_F->setVisibility(F->getVisibility());
-    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-    F->replaceAllUsesWith(GA_F);
-    F->setName("folded." + GA_F->getName());
-    F->setLinkage(GlobalValue::ExternalLinkage);
-    GA_F->setAliasee(F);
+  switch (catF) {
+    case ExternalStrong:
+      switch (catG) {
+        case ExternalStrong:
+        case ExternalWeak:
+          ThunkGToF(F, G);
+          break;
+        case Internal:
+          if (G->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+          break;
+      }
+      break;

-    GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-    GA_G->takeName(G);
-    GA_G->setVisibility(G->getVisibility());
-    G->replaceAllUsesWith(GA_G);
-    G->eraseFromParent();
+    case ExternalWeak: {
+      assert(catG == ExternalWeak);

-    ++NumFunctionsMerged;
-    return true;
+      // Make them both thunks to the same internal function.
+      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      F->replaceAllUsesWith(H);
+
+      ThunkGToF(F, G);
+      ThunkGToF(F, H);
+
+      F->setLinkage(GlobalValue::InternalLinkage);
+    } break;
+
+    case Internal:
+      switch (catG) {
+        case ExternalStrong:
+          assert(0);
+          // fall-through
+        case ExternalWeak:
+	  if (F->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+	  break;
+        case Internal: {
+          bool addrTakenF = F->hasAddressTaken();
+          bool addrTakenG = G->hasAddressTaken();
+          if (!addrTakenF && addrTakenG) {
+            std::swap(FnVec[i], FnVec[j]);
+            std::swap(F, G);
+	    std::swap(addrTakenF, addrTakenG);
+	  }
+
+          if (addrTakenF && addrTakenG) {
+            ThunkGToF(F, G);
+          } else {
+            assert(!addrTakenG);
+            AliasGToF(F, G);
+          }
+	} break;
+      }
+      break;
  }

-  DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n";
-
-  ++NumMergeFails;
-  return false;
+  ++NumFunctionsMerged;
+  return true;
 }

-static bool hasAddressTaken(User *U) {
-  for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) {
-    User *Use = *I;
-
-    // 'call (bitcast @F to ...)' happens a lot.
-    while (isa<ConstantExpr>(Use) && Use->hasOneUse()) {
-      Use = *Use->use_begin();
-    }
-
-    if (isa<ConstantExpr>(Use)) {
-      if (hasAddressTaken(Use))
-        return true;
-    }
-
-    if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use))
-      return true;
-
-    // Make sure we aren't passing U as a parameter to call instead of the
-    // callee.
-    if (CallSite(cast<Instruction>(Use)).hasArgument(U))
-      return true;
-  }
-
-  return false;
-}
+// ===----------------------------------------------------------------------===
+// Pass definition
+// ===----------------------------------------------------------------------===

 bool MergeFunctions::runOnModule(Module &M) {
  bool Changed = false;
@ -329,25 +619,19 @@ bool MergeFunctions::runOnModule(Module &M) {
    if (F->isDeclaration() || F->isIntrinsic())
      continue;

-    if (!F->hasLocalLinkage() && !F->hasExternalLinkage() &&
-        !F->hasWeakLinkage())
-      continue;
-
-    if (hasAddressTaken(F))
-      continue;
-
    FnMap[hash(F)].push_back(F);
  }

-  // TODO: instead of running in a loop, we could also fold functions in callgraph
-  // order. Constructing the CFG probably isn't cheaper than just running in a loop.
+  // TODO: instead of running in a loop, we could also fold functions in
+  // callgraph order. Constructing the CFG probably isn't cheaper than just
+  // running in a loop, unless it happened to already be available.

  bool LocalChanged;
  do {
    LocalChanged = false;
+    DOUT << "size: " << FnMap.size() << "\n";
    for (std::map<unsigned long, std::vector<Function *> >::iterator
         I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
-      DOUT << "size: " << FnMap.size() << "\n";
      std::vector<Function *> &FnVec = I->second;
      DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";

--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@ -0,0 +1,171 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+namespace {
+  struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(&ID) {}
+    
+    bool runOnModule(Module& M);
+    
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner");
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock* entryBlock = F->begin();
+  if (!isa<BranchInst>(entryBlock->getTerminator()))
+    return 0;
+  
+  BasicBlock* returnBlock = 0;
+  BasicBlock* nonReturnBlock = 0;
+  unsigned returnCount = 0;
+  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+       SI != SE; ++SI)
+    if (isa<ReturnInst>((*SI)->getTerminator())) {
+      returnBlock = *SI;
+      returnCount++;
+    } else
+      nonReturnBlock = *SI;
+  
+  if (returnCount != 1)
+    return 0;
+  
+  // Clone the function, so that we can hack away on it.
+  DenseMap<const Value*, Value*> ValueMap;
+  Function* duplicateFunction = CloneFunction(F, ValueMap);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+                                              newReturnBlock->getFirstNonPHI());
+  BasicBlock::iterator I = preReturn->begin();
+  BasicBlock::iterator Ins = newReturnBlock->begin();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+    
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+    
+    retPhi->addIncoming(I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(FI);
+      
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.runOnFunction(*duplicateFunction);
+  
+  // Extract the body of the the if.
+  Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User*> Users(duplicateFunction->use_begin(), 
+                           duplicateFunction->use_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst* CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI);
+    else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+    worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (Function::use_iterator UI = currFunc->use_begin(),
+         UE = currFunc->use_end(); UI != UE; ++UI)
+      if (Instruction* I = dyn_cast<Instruction>(UI))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,

  // Expand the code for the iteration count into the preheader of the loop.
  BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
                                          Preheader->getTerminator());

  // Insert a new icmp_ne or icmp_eq instruction before the branch.
@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  // in this loop, insert a canonical induction variable of the largest size.
  Value *IndVar = 0;
  if (NeedCannIV) {
+    // Check to see if the loop already has a canonical-looking induction
+    // variable. If one is present and it's wider than the planned canonical
+    // induction variable, temporarily remove it, so that the Rewriter
+    // doesn't attempt to reuse it.
+    PHINode *OldCannIV = L->getCanonicalInductionVariable();
+    if (OldCannIV) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        OldCannIV = 0;
+    }
+
    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+
    ++NumInserted;
    Changed = true;
    DOUT << "INDVARS: New CanIV: " << *IndVar;
+
+    // Now that the official induction variable is established, reinsert
+    // the old canonical-looking variable after it so that the IR remains
+    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    if (OldCannIV)
+      OldCannIV->insertAfter(cast<Instruction>(IndVar));
  }

  // If we have a trip count expression, rewrite the loop's exit condition
@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         E = List.end(); UI != E; ++UI) {
      SCEVHandle Offset = UI->getOffset();
      Value *Op = UI->getOperandValToReplace();
+      const Type *UseTy = Op->getType();
      Instruction *User = UI->getUser();
-      bool isSigned = UI->isSigned();

      // Compute the final addrec to expand into code.
      SCEVHandle AR = IU->getReplacementExpr(*UI);
@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
        // Expand loop-invariant values in the loop preheader. They will
        // be sunk to the exit block later, if possible.
        NewVal =
-          Rewriter.expandCodeFor(AR, LargestType,
+          Rewriter.expandCodeFor(AR, UseTy,
                                 L->getLoopPreheader()->getTerminator());
        Rewriter.setInsertionPoint(I);
        ++NumReplaced;
@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
        if (!Stride->isLoopInvariant(L))
          continue;

-        const Type *IVTy = Offset->getType();
-        const Type *UseTy = Op->getType();
-
-        // Promote the Offset and Stride up to the canonical induction
-        // variable's bit width.
-        SCEVHandle PromotedOffset = Offset;
-        SCEVHandle PromotedStride = Stride;
-        if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
-          // It doesn't matter for correctness whether zero or sign extension
-          // is used here, since the value is truncated away below, but if the
-          // value is signed, sign extension is more likely to be folded.
-          if (isSigned) {
-            PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
-            PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
-          } else {
-            PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
-            // If the stride is obviously negative, use sign extension to
-            // produce things like x-1 instead of x+255.
-            if (isa<SCEVConstant>(PromotedStride) &&
-                cast<SCEVConstant>(PromotedStride)
-                  ->getValue()->getValue().isNegative())
-              PromotedStride = SE->getSignExtendExpr(PromotedStride,
-                                                     LargestType);
-            else
-              PromotedStride = SE->getZeroExtendExpr(PromotedStride,
-                                                     LargestType);
-          }
-        }
-
-        // Create the SCEV representing the offset from the canonical
-        // induction variable, still in the canonical induction variable's
-        // type, so that all expanded arithmetic is done in the same type.
-        SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
-                                             PromotedStride, L);
-        // Add the PromotedOffset as a separate step, because it may not be
-        // loop-invariant.
-        NewAR = SE->getAddExpr(NewAR, PromotedOffset);
-
-        // Expand the addrec into instructions.
-        Value *V = Rewriter.expandCodeFor(NewAR);
-
-        // Insert an explicit cast if necessary to truncate the value
-        // down to the original stride type. This is done outside of
-        // SCEVExpander because in SCEV expressions, a truncate of an
-        // addrec is always folded.
-        if (LargestType != IVTy) {
-          if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
-            NewAR = SE->getTruncateExpr(NewAR, IVTy);
-          if (Rewriter.isInsertedExpression(NewAR))
-            V = Rewriter.expandCodeFor(NewAR);
-          else {
-            V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
-                                                                IVTy, false),
-                                        V, IVTy);
-            assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
-                   "LargestType wasn't actually the largest type!");
-            // Force the rewriter to use this trunc whenever this addrec
-            // appears so that it doesn't insert new phi nodes or
-            // arithmetic in a different type.
-            Rewriter.addInsertedValue(V, NewAR);
-          }
-        }
-
-        DOUT << "INDVARS: Made offset-and-trunc IV for offset "
-             << *IVTy << " " << *Offset << ": ";
-        DEBUG(WriteAsOperand(*DOUT, V, false));
-        DOUT << "\n";
-
        // Now expand it into actual Instructions and patch it into place.
        NewVal = Rewriter.expandCodeFor(AR, UseTy);
      }
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@ -2608,21 +2608,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
    }
-
-    if (Op1I->hasOneUse()) {
-      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
-      // is not used by anyone else...
-      //
-      if (Op1I->getOpcode() == Instruction::FSub) {
-        // Swap the two operands of the subexpr...
-        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
-        Op1I->setOperand(0, IIOp1);
-        Op1I->setOperand(1, IIOp0);
-
-        // Create the new top level fadd instruction...
-        return BinaryOperator::CreateFAdd(Op0, Op1);
-      }
-    }
  }

  return 0;
@ -11824,7 +11809,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.

  // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr)) {
+  if (isa<ConstantPointerNull>(Ptr) &&
+      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
    if (!isa<UndefValue>(Val)) {
      SI.setOperand(0, UndefValue::get(Val->getType()));
      if (Instruction *U = dyn_cast<Instruction>(Val))
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@ -125,13 +125,17 @@ static bool MarkAliveBlocks(BasicBlock *BB,
        }
      }
      
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
-        if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
-            isa<UndefValue>(SI->getOperand(1))) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        Value *Ptr = SI->getOperand(1);
+        
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
          ChangeToUnreachable(SI);
          Changed = true;
          break;
        }
+      }
    }

    // Turn invokes that call 'nounwind' functions into ordinary calls.
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@ -364,4 +364,15 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys,
 #include "llvm/Intrinsics.gen"
 #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN

+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it.
+bool Function::hasAddressTaken() const {
+  for (Value::use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    if (I.getOperandNo() != 0 ||
+        (!isa<CallInst>(*I) && !isa<InvokeInst>(*I)))
+      return true;
+  }
+  return false;
+}
+
 // vim: sw=2 ai
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@ -218,9 +218,12 @@ bool Instruction::isIdenticalTo(const Instruction *I) const {
 }

 // isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
 bool Instruction::isSameOperationAs(const Instruction *I) const {
-  if (getOpcode() != I->getOpcode() || getType() != I->getType() ||
-      getNumOperands() != I->getNumOperands())
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
    return false;

  // We have two instructions of identical opcode and #operands.  Check to see
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@ -276,8 +276,8 @@ namespace {
                          int VT, unsigned ArgNo, std::string &Suffix);
    void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
                                  unsigned RetNum, unsigned ParamNum, ...);
-    void VerifyAttrs(Attributes Attrs, const Type *Ty,
-                     bool isReturnValue, const Value *V);
+    void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                              bool isReturnValue, const Value *V);
    void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
                             const Value *V);

@ -437,22 +437,23 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
 void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
 }

-// VerifyAttrs - Check the given parameter attributes for an argument or return
+// VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyAttrs(Attributes Attrs, const Type *Ty, 
-                           bool isReturnValue, const Value *V) {
+void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                                    bool isReturnValue, const Value *V) {
  if (Attrs == Attribute::None)
    return;

+  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+          " only applies to the function!", V);
+
  if (isReturnValue) {
    Attributes RetI = Attrs & Attribute::ParameterOnly;
    Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
            " does not apply to return values!", V);
  }
-  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
-  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
-          " only applies to functions!", V);
-  
+
  for (unsigned i = 0;
       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
    Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
@ -495,9 +496,9 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
    else if (Attr.Index-1 < FT->getNumParams())
      Ty = FT->getParamType(Attr.Index-1);
    else
-      break;  // VarArgs attributes, don't verify.
-    
-    VerifyAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+      break;  // VarArgs attributes, verified elsewhere.
+
+    VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);

    if (Attr.Attrs & Attribute::Nest) {
      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
@ -509,10 +510,10 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
  }

  Attributes FAttrs = Attrs.getFnAttributes();
-  Assert1(!(FAttrs & (~Attribute::FunctionOnly)),
-          "Attribute " + Attribute::getAsString(FAttrs) +
-          " does not apply to function!", V);
-      
+  Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
+  Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+          " does not apply to the function!", V);
+
  for (unsigned i = 0;
       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
    Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
@ -1025,7 +1026,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
    for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
      Attributes Attr = Attrs.getParamAttributes(Idx);

-      VerifyAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+      VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);

      Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
      Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
--- a/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
+++ b/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis -disable-output
+; PR4373
+
+@foo = weak global { i32 } zeroinitializer              
+@bar = weak global i32 0                
+
+define void @test() {
+entry:
+        store { i32 } zeroinitializer, { i32 }* @foo
+        store i32 1, i32* @bar
+        ret void
+}
--- a/test/BugPoint/misopt-basictest.ll
+++ b/test/BugPoint/misopt-basictest.ll
@ -1,4 +1,7 @@
 ; RUN: bugpoint %s -dce -bugpoint-deletecalls -simplifycfg -silence-passes %bugpoint_topts
+; XFAIL: powerpc-.*-linux
+; END.
+; Failure on PPC Linux is due to PR4293.

@.LC0 = internal global [13 x i8] c"Hello World\0A\00"          ; <[13 x i8]*> [#uses=1]

--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@ -0,0 +1,77 @@
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+
+	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
+	type { i32, %struct.D_Reduction** }		; type %1
+	type { i32, %struct.D_RightEpsilonHint* }		; type %2
+	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
+	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
+	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
+	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
+	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
+	%struct.D_Pass = type { i8*, i32, i32, i32 }
+	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
+	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
+	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
+	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
+	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
+	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
+	%struct.D_SymHash = type { i32, i32, %0 }
+	%struct.D_Symbol = type { i32, i8*, i32 }
+	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
+	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
+	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
+	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
+	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
+	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
+	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
+	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
+	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
+	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
+	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
+	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
+	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
+entry:
+	store i8* undef, i8** undef, align 4
+	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
+	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
+	br label %bb10
+
+bb10:		; preds = %bb30, %bb29, %bb26, %entry
+	br i1 undef, label %bb18, label %bb20
+
+bb18:		; preds = %bb10
+	br i1 undef, label %bb20, label %bb19
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb10
+	br i1 undef, label %bb21, label %bb22
+
+bb21:		; preds = %bb20
+	unreachable
+
+bb22:		; preds = %bb20
+	br i1 undef, label %bb24, label %bb26
+
+bb24:		; preds = %bb22
+	unreachable
+
+bb26:		; preds = %bb22
+	br i1 undef, label %bb10, label %bb29
+
+bb29:		; preds = %bb26
+	br i1 undef, label %bb10, label %bb30
+
+bb30:		; preds = %bb29
+	br i1 undef, label %bb31, label %bb10
+
+bb31:		; preds = %bb30
+	unreachable
+}
--- a/test/CodeGen/ARM/arguments_f64_backfill.ll
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+
+define float @f(float %z, double %a, float %b) {
+        %tmp = call float @g(float %b)
+        ret float %tmp
+}
+
+declare float @g(float)
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed}
 ; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 -arm-pre-alloc-loadstore-opti | grep stm | count 2
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
--- a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+
+; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
+; calling convention out of sync with standard c calling convention on x86_64)
+; Bug 4278.
+
+declare fastcc double @tailcallee(x86_fp80, <2 x float>) 
+	
+define fastcc double @tailcall() {
+entry:
+  %tmp = fpext float 1.000000e+00 to x86_fp80
+	%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp,  <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+	ret double %tmp2
+}
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@ -3,19 +3,19 @@
 ; Move param %in1 to temp register (%eax).
 ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	40(%rsp), %eax}
 ; Add %in1 %p1 to another temporary register (%r9d).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r9d}
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r9d}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r10d}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r10d}
 ; Move result of addition to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r9d, 40(%rsp)}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r10d, 40(%rsp)}
 ; Move param %in2 to stack.
 ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%eax, 32(%rsp)}

-declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %a, i32 %b)
+declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)

-define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in1, i32 %in2) {
+define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) {
 entry:
        %tmp = add i32 %in1, %p1
-        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in2,i32 %tmp)
+        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
        ret i32 %retval
 }

--- a/test/DebugInfo/2009-06-12-Inline.ll
+++ b/test/DebugInfo/2009-06-12-Inline.ll
@ -0,0 +1,94 @@
+; RUN: llvm-as < %s | llc -f -o /dev/null 
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+	%struct._objc_cache = type opaque
+	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol_list*, i32, %struct._prop_list_t* }
+	%struct._objc_class = type { %struct._objc_class*, %struct._objc_class*, i8*, i32, i32, i32, %struct._objc_ivar_list*, %struct._objc_method_list*, %struct._objc_cache*, %struct._objc_protocol_list*, i8*, %struct._objc_class_extension* }
+	%struct._objc_class_extension = type { i32, i8*, %struct._prop_list_t* }
+	%struct._objc_exception_data = type { [18 x i32], [4 x i8*] }
+	%struct._objc_ivar = type { i8*, i8*, i32 }
+	%struct._objc_ivar_list = type opaque
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_description = type { %struct.objc_selector*, i8* }
+	%struct._objc_method_description_list = type { i32, [0 x %struct._objc_method_description] }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol_list*, %struct._objc_method_description_list*, %struct._objc_method_description_list* }
+	%struct._objc_protocol_extension = type { i32, %struct._objc_method_description_list*, %struct._objc_method_description_list*, %struct._prop_list_t* }
+	%struct._objc_protocol_list = type { %struct._objc_protocol_list*, i32, [0 x %struct._objc_protocol] }
+	%struct._objc_super = type <{ %struct.objc_object*, %struct.objc_class* }>
+	%struct._objc_symtab = type { i32, %struct.objc_selector*, i16, i16, [0 x i8*] }
+	%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+	%struct._prop_t = type { i8*, i8* }
+	%struct.objc_class = type opaque
+	%struct.objc_object = type opaque
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.m\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [20 x i8] c"/Volumes/work/Radar\00", section "llvm.metadata"		; <[20 x i8]*> [#uses=1]
+@.str2 = internal constant [10 x i8] c"clang 1.0\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 16, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([10 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str3 = internal constant [3 x i8] c"f1\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str5 = internal constant [3 x i8] c"l0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([3 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@.str6 = internal constant [3 x i8] c"f0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.subprogram7 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str8 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable9 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [3 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata"		; <[3 x i8*]*> [#uses=0]
+
+define void @f1() nounwind {
+entry:
+	%x.addr.i = alloca i32		; <i32*> [#uses=2]
+	%l0 = alloca void (i32)*, align 4		; <void (i32)**> [#uses=2]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	store i32 1, i32* %x.addr.i
+	%0 = bitcast i32* %x.addr.i to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
+	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.stoppoint(i32 5, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	%1 = bitcast void (i32)** %l0 to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
+	store void (i32)* @f0, void (i32)** %l0
+	call void @llvm.dbg.stoppoint(i32 6, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret void
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind readnone
+
+define internal void @f0(i32 %x) nounwind alwaysinline {
+entry:
+	%x.addr = alloca i32		; <i32*> [#uses=2]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	store i32 %x, i32* %x.addr
+	%0 = bitcast i32* %x.addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
+	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	ret void
+}
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind readnone
+
+declare void @llvm.dbg.region.end({ }*) nounwind readnone
--- a/test/DebugInfo/2009-06-12-InlineFuncStart.ll
+++ b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
@ -0,0 +1,75 @@
+; RUN: llvm-as < %s | llc
+; RUN: llvm-as < %s | llc -O0
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+
+define i32 @foo() nounwind alwaysinline {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	store i32 42, i32* %0, align 4
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %1, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret i32 %retval1
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
+
+define i32 @main() nounwind {
+entry:
+	%retval.i = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%1 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+        br label %bb1
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+	ret i32 %retval1
+
+bb1:
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	store i32 42, i32* %0, align 4
+	%2 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %2, i32* %retval.i, align 4
+	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	store i32 %retval1.i, i32* %1, align 4
+	%3 = load i32* %1, align 4		; <i32> [#uses=1]
+	store i32 %3, i32* %retval, align 4
+	br label %return
+}
--- a/test/FrontendC/pr3518.c
+++ b/test/FrontendC/pr3518.c
@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep {= internal global} | count 4
+// PR 3518
+// Some of the objects were coming out as unintialized (external) before 3518
+// was fixed.  Internal names are different between llvm-gcc and clang so they
+// are not tested.
+
+extern void abort (void);
+
+struct A { int i; int j; };
+struct B { struct A *a; struct A *b; };
+struct C { struct B *c; struct A *d; };
+struct C e = { &(struct B) { &(struct A) { 1, 2 }, &(struct A) { 3, 4 } }, &(struct A) { 5, 6 } };
+
+int
+main (void)
+{
+  if (e.c->a->i != 1 || e.c->a->j != 2)
+    abort ();
+  if (e.c->b->i != 3 || e.c->b->j != 4)
+    abort ();
+  if (e.d->i != 5 || e.d->j != 6)
+    abort ();
+  return 0;
+}
--- a/test/FrontendC/pr4349.c
+++ b/test/FrontendC/pr4349.c
@ -0,0 +1,39 @@
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars2 | grep {\\\[2 x \\\[2 x i8\\\]\\\]}
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars2 | grep {, i\[\[:digit:\]\]\\+ 1)} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars3 | grep {\\\[2 x i16\\\]}
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars3 | grep {, i\[\[:digit:\]\]\\+ 1)} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars4 | grep {\\\[2 x \\\[2 x i8\\\]\\\]} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars4 | grep {, i\[\[:digit:\]\]\\+ 1, i\[\[:digit:\]\]\\+ 1)} | count 1
+// PR 4349
+
+union reg
+{
+    unsigned char b[2][2];
+    unsigned short w[2];
+    unsigned int d;
+};
+struct cpu
+{
+    union reg pc;
+};
+extern struct cpu cpu;
+struct svar
+{
+    void *ptr;
+};
+struct svar svars1[] =
+{
+    { &((cpu.pc).w[0]) }
+};
+struct svar svars2[] =
+{
+    { &((cpu.pc).b[0][1]) }
+};
+struct svar svars3[] =
+{
+    { &((cpu.pc).w[1]) }
+};
+struct svar svars4[] =
+{
+    { &((cpu.pc).b[1][1]) }
+};
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@ -0,0 +1,87 @@
+// RUN: tblgen %s | grep {\\\[(set} | count 2
+// RUN: tblgen %s | grep {\\\[\\\]} | count 2
+
+class ValueType<int size, int value> {
+  int Size = size;
+  int Value = value;
+}
+
+def f32  : ValueType<32, 1>;   //  2 x i64 vector value
+
+class Intrinsic<string name> {
+  string Name = name;
+}
+
+class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
+           list<dag> pattern> {
+  bits<8> Opcode = opcode;
+  dag OutOperands = oopnds;
+  dag InOperands = iopnds;
+  string AssemblyString = asmstr;
+  list<dag> Pattern = pattern;
+}
+
+def ops;
+def outs;
+def ins;
+
+def set;
+
+// Define registers
+class Register<string n> {
+  string Name = n;
+}
+
+class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
+  list<ValueType> RegTypes = regTypes;
+  list<Register> MemberList = regList;
+}
+
+def XMM0: Register<"xmm0">;
+def XMM1: Register<"xmm1">;
+def XMM2: Register<"xmm2">;
+def XMM3: Register<"xmm3">;
+def XMM4: Register<"xmm4">;
+def XMM5: Register<"xmm5">;
+def XMM6: Register<"xmm6">;
+def XMM7: Register<"xmm7">;
+def XMM8:  Register<"xmm8">;
+def XMM9:  Register<"xmm9">;
+def XMM10: Register<"xmm10">;
+def XMM11: Register<"xmm11">;
+def XMM12: Register<"xmm12">;
+def XMM13: Register<"xmm13">;
+def XMM14: Register<"xmm14">;
+def XMM15: Register<"xmm15">;
+
+def FR32 : RegisterClass<[f32],
+                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                          XMM8, XMM9, XMM10, XMM11,
+                          XMM12, XMM13, XMM14, XMM15]>;
+
+class SDNode {}
+def not : SDNode;
+
+multiclass scalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,patterns[0])>;
+  def SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass vscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def V#NAME#SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,patterns[0])>;
+  def V#NAME#SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> :
+  scalar<opcode, asmstr, patterns>,
+  vscalar<opcode, asmstr, patterns>;
+
+defm NOT : myscalar<0x10, "not", [[], [(set FR32:$dst, (f32 (not FR32:$src)))]]>;
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@ -1,5 +1,5 @@
-// RUN: tblgen %s | grep {1, 2, 3} | count 4
-// RUN: tblgen %s | grep {4, 5, 6} | count 2
+// RUN: tblgen %s | grep {\\\[1, 2, 3\\\]} | count 4
+// RUN: tblgen %s | grep {\\\[4, 5, 6\\\]} | count 2

 class A<list<list<int>> vals> {
  list<int> first = vals[0];
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars  | llvm-dis | %prcontext Loop: 1 | grep %indvar
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | %prcontext ^Loop: 1 | grep %Canonical

 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
--- a/test/Transforms/IndVarSimplify/masked-iv.ll
+++ b/test/Transforms/IndVarSimplify/masked-iv.ll
@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep trunc | count 1
+
+; Indvars should do the IV arithmetic in the canonical IV type (i64),
+; and only use one truncation.
+
+define void @foo(i64* %A, i64* %B, i64 %n, i64 %a, i64 %s) nounwind {
+entry:
+	%t0 = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.01 = phi i64 [ %t6, %bb ], [ %a, %bb.preheader ]		; <i64> [#uses=3]
+	%t1 = and i64 %i.01, 255		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %A, i64 %t1		; <i64*> [#uses=1]
+	store i64 %i.01, i64* %t2, align 8
+	%t6 = add i64 %i.01, %s		; <i64> [#uses=1]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
--- a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep store
+; PR4366
+
+define void @a() {
+  store i32 0, i32 addrspace(1)* null
+  ret void
+}
--- a/Show More
+++ b/Show More