Vendor import of llvm trunk r162107:

http://llvm.org/svn/llvm-project/llvm/trunk@162107
svn path=/vendor/llvm/dist/; revision=239390 svn path=/vendor/llvm/llvm-trunk-r162107/; revision=239391; tag=vendor/llvm/llvm-trunk-r162107
2012-08-19 10:31:50 +00:00 · 2012-08-19 10:31:50 +00:00 · 902a7b5298 · 2020-12-20 02:59:44 +00:00
commit 902a7b5298
parent 58b69754af
96 changed files with 3312 additions and 1332 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -128,10 +128,15 @@ if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
  set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()

+set(LLVM_TARGETS_TO_BUILD
+   ${LLVM_TARGETS_TO_BUILD}
+   ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD})
+
 set(LLVM_ENUM_TARGETS "")
 foreach(c ${LLVM_TARGETS_TO_BUILD})
  list(FIND LLVM_ALL_TARGETS ${c} idx)
-  if( idx LESS 0 )
+  list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${c} idy)
+  if( idx LESS 0 AND idy LESS 0 )
    message(FATAL_ERROR "The target `${c}' does not exist.
    It should be one of\n${LLVM_ALL_TARGETS}")
  else()
@ -139,11 +144,6 @@ foreach(c ${LLVM_TARGETS_TO_BUILD})
  endif()
 endforeach(c)

-set(LLVM_TARGETS_TO_BUILD
-  ${LLVM_TARGETS_TO_BUILD}
-  ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}
-  )
-
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)

 include(AddLLVMDefinitions)
--- a/4
+++ b/4
@ -244,13 +244,13 @@ build-for-llvm-top:
 SVN = svn
 SVN-UPDATE-OPTIONS =
 AWK = awk
-SUB-SVN-DIRS = $(AWK) '/\?\ \ \ \ \ \ / {print $$2}'   \
+SUB-SVN-DIRS = $(AWK) '/I|\?      / {print $$2}'   \
 		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
 		| $(AWK) '/^Path:\ / {print $$2}'

 update:
 	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
-	@ $(SVN) status $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
+	@ $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update

 happiness: update all check-all

--- a/Makefile.config.in
+++ b/Makefile.config.in
@ -258,6 +258,11 @@ ENABLE_WERROR = @ENABLE_WERROR@
 #DEBUG_SYMBOLS = 1
@DEBUG_SYMBOLS@

+# When KEEP_SYMBOLS is enabled, installed executables will never have their
+# symbols stripped.
+#KEEP_SYMBOLS = 1
+@KEEP_SYMBOLS@
+
 # The compiler flags to use for optimized builds.
 OPTIMIZE_OPTION := @OPTIMIZE_OPTION@

--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@ -542,6 +542,15 @@ else
  AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
 fi

+dnl --enable-keep-symbols : do not strip installed executables
+AC_ARG_ENABLE(keep-symbols,
+   AS_HELP_STRING(--enable-keep-symbols,[Do not strip installed executables)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(KEEP_SYMBOLS,[[]])
+else
+  AC_SUBST(KEEP_SYMBOLS,[[KEEP_SYMBOLS=1]])
+fi
+
 dnl --enable-jit: check whether they want to enable the jit
 AC_ARG_ENABLE(jit,
  AS_HELP_STRING(--enable-jit,
--- a/24
+++ b/24
@ -693,6 +693,7 @@ ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
 DEBUG_RUNTIME
 DEBUG_SYMBOLS
+KEEP_SYMBOLS
 JIT
 TARGET_HAS_JIT
 ENABLE_DOCS
@ -1408,6 +1409,7 @@ Optional Features:
                          NO)
  --enable-debug-symbols  Build compiler with debug symbols (default is NO if
                          optimization is on and YES if it's off)
+  --enable-keep-symbols   Do not strip installed executables)
  --enable-jit            Enable Just In Time Compiling (default is YES)
  --enable-docs           Build documents (default is YES)
  --enable-doxygen        Build doxygen documentation (default is NO)
@ -5158,6 +5160,21 @@ else

 fi

+# Check whether --enable-keep-symbols was given.
+if test "${enable_keep_symbols+set}" = set; then
+  enableval=$enable_keep_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  KEEP_SYMBOLS=
+
+else
+  KEEP_SYMBOLS=KEEP_SYMBOLS=1
+
+fi
+
 # Check whether --enable-jit was given.
 if test "${enable_jit+set}" = set; then
  enableval=$enable_jit;
@ -10272,7 +10289,7 @@ else
  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
  lt_status=$lt_dlunknown
  cat > conftest.$ac_ext <<EOF
-#line 10275 "configure"
+#line 10292 "configure"
 #include "confdefs.h"

 #if HAVE_DLFCN_H
@ -22150,12 +22167,12 @@ ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
-ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 _ACEOF

  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@ -22197,6 +22214,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
  cat >conf$$subs.sed <<_ACEOF
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
@ -22293,7 +22311,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF

-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 94; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
    break
  elif $ac_last_try; then
    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@ -687,8 +687,7 @@ class SmallDenseMap

  /// A "union" of an inline bucket array and the struct representing
  /// a large bucket. This union will be discriminated by the 'Small' bit.
-  typename AlignedCharArray<BucketT[InlineBuckets], LargeRep>::union_type
-    storage;
+  AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage;

 public:
  explicit SmallDenseMap(unsigned NumInitBuckets = 0) {
@ -834,8 +833,7 @@ class SmallDenseMap
        return; // Nothing to do.

      // First move the inline buckets into a temporary storage.
-      typename AlignedCharArray<BucketT[InlineBuckets]>::union_type
-        TmpStorage;
+      AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage;
      BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer);
      BucketT *TmpEnd = TmpBegin;

--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@ -206,7 +206,7 @@ struct VariadicFunction2 {
  ResultT operator()(Param0T P0, Param1T P1, \
                     LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
    const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
-    return Func(P0, P1, makeAraryRef(Args)); \
+    return Func(P0, P1, makeArrayRef(Args)); \
  }
  LLVM_DEFINE_OVERLOAD(1)
  LLVM_DEFINE_OVERLOAD(2)
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@ -122,6 +122,7 @@ class BranchProbabilityInfo : public FunctionPass {
  bool calcLoopBranchHeuristics(BasicBlock *BB);
  bool calcZeroHeuristics(BasicBlock *BB);
  bool calcFloatingPointHeuristics(BasicBlock *BB);
+  bool calcInvokeHeuristics(BasicBlock *BB);
 };

 }
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@ -705,7 +705,20 @@ DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A, const NodeT *B) {

 EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);

-class BasicBlockEdge;
+class BasicBlockEdge {
+  const BasicBlock *Start;
+  const BasicBlock *End;
+public:
+  BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) :
+    Start(Start_), End(End_) { }
+  const BasicBlock *getStart() const {
+    return Start;
+  }
+  const BasicBlock *getEnd() const {
+    return End;
+  }
+  bool isSingleEdge() const;
+};

 //===-------------------------------------
 /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@ -420,6 +420,12 @@ class MachineInstr : public ilist_node<MachineInstr> {
    return hasProperty(MCID::Bitcast, Type);
  }

+  /// isSelect - Return true if this instruction is a select instruction.
+  ///
+  bool isSelect(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Select, Type);
+  }
+
  /// isNotDuplicable - Return true if this instruction cannot be safely
  /// duplicated.  For example, if the instruction has a unique labels attached
  /// to it, duplicating it would cause multiple definition errors.
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -146,7 +146,8 @@ class SDValue {
  inline bool isMachineOpcode() const;
  inline unsigned getMachineOpcode() const;
  inline const DebugLoc getDebugLoc() const;
-
+  inline void dump() const;
+  inline void dumpr() const;

  /// reachesChainWithoutSideEffects - Return true if this operand (which must
  /// be a chain) reaches the specified operand without crossing any
@ -806,7 +807,12 @@ inline bool SDValue::hasOneUse() const {
 inline const DebugLoc SDValue::getDebugLoc() const {
  return Node->getDebugLoc();
 }
-
+inline void SDValue::dump() const {
+  return Node->dump();
+}
+inline void SDValue::dumpr() const {
+  return Node->dumpr();
+}
 // Define inline functions from the SDUse class.

 inline void SDUse::set(const SDValue &V) {
--- a/include/llvm/IntrinsicsHexagon.td
+++ b/include/llvm/IntrinsicsHexagon.td
@ -15,7 +15,7 @@
 //
 // All Hexagon intrinsics start with "llvm.hexagon.".
 let TargetPrefix = "hexagon" in {
-  /// Hexagon_Intrinsic - Base class for all altivec intrinsics.
+  /// Hexagon_Intrinsic - Base class for all Hexagon intrinsics.
  class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                              list<LLVMType> param_types,
                              list<IntrinsicProperty> properties>
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@ -0,0 +1,32 @@
+//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Fixed length disassembler decoder state machine driver.
+//===----------------------------------------------------------------------===//
+#ifndef MCFIXEDLENDISASSEMBLER_H
+#define MCFIXEDLENDISASSEMBLER_H
+
+namespace llvm {
+
+namespace MCD {
+// Disassembler state machine opcodes.
+enum DecoderOps {
+  OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
+  OPC_FilterValue,      // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckField,       // OPC_CheckField(uint8_t Start, uint8_t Len,
+                        //                uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckPredicate,   // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
+  OPC_Decode,           // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+  OPC_SoftFail,         // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
+  OPC_Fail              // OPC_Fail()
+};
+
+} // namespace MCDecode
+} // namespace llvm
+
+#endif
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@ -107,6 +107,7 @@ namespace MCID {
    Compare,
    MoveImm,
    Bitcast,
+    Select,
    DelaySlot,
    FoldableAsLoad,
    MayLoad,
@ -282,6 +283,12 @@ class MCInstrDesc {
    return Flags & (1 << MCID::Bitcast);
  }

+  /// isSelect - Return true if this is a select instruction.
+  ///
+  bool isSelect() const {
+    return Flags & (1 << MCID::Select);
+  }
+
  /// isNotDuplicable - Return true if this instruction cannot be safely
  /// duplicated.  For example, if the instruction has a unique labels attached
  /// to it, duplicating it would cause multiple definition errors.
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@ -107,8 +107,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 // Any larger and MSVC complains.
 #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT

-/// \brief This class template exposes a typedef for type containing a suitable
-/// aligned character array to hold elements of any of up to four types.
+/// \brief This union template exposes a suitably aligned and sized character
+/// array member which can hold elements of any of up to four types.
 ///
 /// These types may be arrays, structs, or any other types. The goal is to
 /// produce a union type containing a character array which, when used, forms
@ -116,7 +116,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 /// than four types can be added at the cost of more boiler plate.
 template <typename T1,
          typename T2 = char, typename T3 = char, typename T4 = char>
-class AlignedCharArray {
+union AlignedCharArrayUnion {
+private:
  class AlignerImpl {
    T1 t1; T2 t2; T3 t3; T4 t4;

@ -127,6 +128,12 @@ class AlignedCharArray {
  };

 public:
+  /// \brief The character array buffer for use by clients.
+  ///
+  /// No other member of this union should be referenced. The exist purely to
+  /// constrain the layout of this character array.
+  char buffer[sizeof(SizerImpl)];
+
  // Sadly, Clang and GCC both fail to align a character array properly even
  // with an explicit alignment attribute. To work around this, we union
  // the character array that will actually be used with a struct that contains
@ -134,16 +141,10 @@ class AlignedCharArray {
  // and GCC will properly register the alignment of a struct containing an
  // aligned member, and this alignment should carry over to the character
  // array in the union.
-  union union_type {
-    // This is the only member of the union which should be used by clients:
-    char buffer[sizeof(SizerImpl)];
-
-    // This member of the union only exists to force the alignment.
-    struct {
-      typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
-        nonce_inner_member;
-    } nonce_member;
-  };
+  struct {
+    typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
+      nonce_inner_member;
+  } nonce_member;
 };

 } // end namespace llvm
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@ -50,7 +50,7 @@ namespace COFF {
  };

  enum MachineTypes {
-    MT_Invalid = -1,
+    MT_Invalid = 0xffff,

    IMAGE_FILE_MACHINE_UNKNOWN   = 0x0,
    IMAGE_FILE_MACHINE_AM33      = 0x13,
@ -142,7 +142,7 @@ namespace COFF {

  /// Storage class tells where and what the symbol represents
  enum SymbolStorageClass {
-    SSC_Invalid = -1,
+    SSC_Invalid = 0xff,

    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
    IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
@ -220,7 +220,7 @@ namespace COFF {
  };

  enum SectionCharacteristics {
-    SC_Invalid = -1,
+    SC_Invalid = 0xffffffff,

    IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
    IMAGE_SCN_CNT_CODE               = 0x00000020,
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@ -38,6 +38,25 @@
 #define llvm_move(value) (value)
 #endif

+/// LLVM_DELETED_FUNCTION - Expands to = delete if the compiler supports it.
+/// Use to mark functions as uncallable. Member functions with this should
+/// be declared private so that some behaivor is kept in C++03 mode.
+///
+/// class DontCopy {
+/// private:
+///   DontCopy(const DontCopy&) LLVM_DELETED_FUNCTION;
+///   DontCopy &operator =(const DontCopy&) LLVM_DELETED_FUNCTION;
+/// public:
+///   ...
+/// };
+#if (__has_feature(cxx_deleted_functions) \
+     || defined(__GXX_EXPERIMENTAL_CXX0X__))
+     // No version of MSVC currently supports this.
+#define LLVM_DELETED_FUNCTION = delete
+#else
+#define LLVM_DELETED_FUNCTION
+#endif
+
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@ -28,6 +28,7 @@
 #define LLVM_SUPPORT_FILE_SYSTEM_H

 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
@ -576,6 +577,82 @@ error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result);
 error_code GetMainExecutable(const char *argv0, void *MainAddr,
                             SmallVectorImpl<char> &result);

+/// This class represents a memory mapped file. It is based on
+/// boost::iostreams::mapped_file.
+class mapped_file_region {
+  mapped_file_region() LLVM_DELETED_FUNCTION;
+  mapped_file_region(mapped_file_region&) LLVM_DELETED_FUNCTION;
+  mapped_file_region &operator =(mapped_file_region&) LLVM_DELETED_FUNCTION;
+
+public:
+  enum mapmode {
+    readonly, //< May only access map via const_data as read only.
+    readwrite, //< May access map via data and modify it. Written to path.
+    priv //< May modify via data, but changes are lost on destruction.
+  };
+
+private:
+  /// Platform specific mapping state.
+  mapmode Mode;
+  uint64_t Size;
+  void *Mapping;
+#if LLVM_ON_WIN32
+  int FileDescriptor;
+  void *FileHandle;
+  void *FileMappingHandle;
+#endif
+
+  error_code init(int FD, uint64_t Offset);
+
+public:
+  typedef char char_type;
+
+#if LLVM_USE_RVALUE_REFERENCES
+  mapped_file_region(mapped_file_region&&);
+  mapped_file_region &operator =(mapped_file_region&&);
+#endif
+
+  /// Construct a mapped_file_region at \a path starting at \a offset of length
+  /// \a length and with access \a mode.
+  ///
+  /// \param path Path to the file to map. If it does not exist it will be
+  ///             created.
+  /// \param mode How to map the memory.
+  /// \param length Number of bytes to map in starting at \a offset. If the file
+  ///               is shorter than this, it will be extended. If \a length is
+  ///               0, the entire file will be mapped.
+  /// \param offset Byte offset from the beginning of the file where the map
+  ///               should begin. Must be a multiple of
+  ///               mapped_file_region::alignment().
+  /// \param ec This is set to errc::success if the map was constructed
+  ///           sucessfully. Otherwise it is set to a platform dependent error.
+  mapped_file_region(const Twine &path,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  /// \param fd An open file descriptor to map. mapped_file_region takes
+  ///           ownership. It must have been opended in the correct mode.
+  mapped_file_region(int fd,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  ~mapped_file_region();
+
+  mapmode flags() const;
+  uint64_t size() const;
+  char *data() const;
+
+  /// Get a const view of the data. Modifying this memory has undefined
+  /// behaivor.
+  const char *const_data() const;
+
+  /// \returns The minimum alignment offset must be.
+  static int alignment();
+};

 /// @brief Memory maps the contents of a file
 ///
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@ -19,7 +19,7 @@

 namespace llvm {

-/// Utility function to encode a SLEB128 value.
+/// Utility function to encode a SLEB128 value to an output stream.
 static inline void encodeSLEB128(int64_t Value, raw_ostream &OS) {
  bool More;
  do {
@ -34,7 +34,7 @@ static inline void encodeSLEB128(int64_t Value, raw_ostream &OS) {
  } while (More);
 }

-/// Utility function to encode a ULEB128 value.
+/// Utility function to encode a ULEB128 value to an output stream.
 static inline void encodeULEB128(uint64_t Value, raw_ostream &OS,
                                 unsigned Padding = 0) {
  do {
@ -53,6 +53,43 @@ static inline void encodeULEB128(uint64_t Value, raw_ostream &OS,
  }
 }

+/// Utility function to encode a ULEB128 value to a buffer. Returns
+/// the length in bytes of the encoded value.
+static inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
+                                     unsigned Padding = 0) {
+  uint8_t *orig_p = p;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0 || Padding != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    *p++ = Byte;
+  } while (Value != 0);
+
+  // Pad with 0x80 and emit a null byte at the end.
+  if (Padding != 0) {
+    for (; Padding != 1; --Padding)
+      *p++ = '\x80';
+    *p++ = '\x00';
+  }
+  return (unsigned)(p - orig_p);
+}
+
+
+/// Utility function to decode a ULEB128 value.
+static inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = 0) {
+  const uint8_t *orig_p = p;
+  uint64_t Value = 0;
+  unsigned Shift = 0;
+  do {
+    Value += (*p & 0x7f) << Shift;
+    Shift += 7;
+  } while (*p++ >= 128);
+  if (n)
+    *n = (unsigned)(p - orig_p);
+  return Value;
+}
+
 }  // namespace llvm

 #endif  // LLVM_SYSTEM_LEB128_H
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@ -181,6 +181,12 @@ class NoFolder {
                                ArrayRef<Constant *> IdxList) const {
    return ConstantExpr::getGetElementPtr(C, IdxList);
  }
+  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getGetElementPtr(C, Idx);
+  }
  Instruction *CreateGetElementPtr(Constant *C,
                                   ArrayRef<Value *> IdxList) const {
    return GetElementPtrInst::Create(C, IdxList);
@ -190,6 +196,12 @@ class NoFolder {
                                        ArrayRef<Constant *> IdxList) const {
    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
  }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idx);
+  }
  Instruction *CreateInBoundsGetElementPtr(Constant *C,
                                           ArrayRef<Value *> IdxList) const {
    return GetElementPtrInst::CreateInBounds(C, IdxList);
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@ -28,6 +28,24 @@ class SubRegIndex<list<SubRegIndex> comps = []> {
  // ComposedOf - A list of two SubRegIndex instances, [A, B].
  // This indicates that this SubRegIndex is the result of composing A and B.
  list<SubRegIndex> ComposedOf = comps;
+
+  // CoveringSubRegIndices - A list of two or more sub-register indexes that
+  // cover this sub-register.
+  //
+  // This field should normally be left blank as TableGen can infer it.
+  //
+  // TableGen automatically detects sub-registers that straddle the registers
+  // in the SubRegs field of a Register definition. For example:
+  //
+  //   Q0    = dsub_0 -> D0, dsub_1 -> D1
+  //   Q1    = dsub_0 -> D2, dsub_1 -> D3
+  //   D1_D2 = dsub_0 -> D1, dsub_1 -> D2
+  //   QQ0   = qsub_0 -> Q0, qsub_1 -> Q1
+  //
+  // TableGen will infer that D1_D2 is a sub-register of QQ0. It will be given
+  // the synthetic index dsub_1_dsub_2 unless some SubRegIndex is defined with
+  // CoveringSubRegIndices = [dsub_1, dsub_2].
+  list<SubRegIndex> CoveringSubRegIndices = [];
 }

 // RegAltNameIndex - The alternate name set to use for register operands of
@ -321,6 +339,7 @@ class Instruction {
  bit isCompare    = 0;     // Is this instruction a comparison instruction?
  bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
  bit isBitcast    = 0;     // Is this instruction a bitcast instruction?
+  bit isSelect     = 0;     // Is this instruction a select instruction?
  bit isBarrier    = 0;     // Can control flow fall through this instruction?
  bit isCall       = 0;     // Is this instruction a call instruction?
  bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@ -413,6 +413,51 @@ class TargetInstrInfo : public MCInstrInfo {
    llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
  }

+  /// analyzeSelect - Analyze the given select instruction, returning true if
+  /// it cannot be understood. It is assumed that MI->isSelect() is true.
+  ///
+  /// When successful, return the controlling condition and the operands that
+  /// determine the true and false result values.
+  ///
+  ///   Result = SELECT Cond, TrueOp, FalseOp
+  ///
+  /// Some targets can optimize select instructions, for example by predicating
+  /// the instruction defining one of the operands. Such targets should set
+  /// Optimizable.
+  ///
+  /// @param         MI Select instruction to analyze.
+  /// @param Cond    Condition controlling the select.
+  /// @param TrueOp  Operand number of the value selected when Cond is true.
+  /// @param FalseOp Operand number of the value selected when Cond is false.
+  /// @param Optimizable Returned as true if MI is optimizable.
+  /// @returns False on success.
+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const {
+    assert(MI && MI->isSelect() && "MI must be a select instruction");
+    return true;
+  }
+
+  /// optimizeSelect - Given a select instruction that was understood by
+  /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by
+  /// merging it with one of its operands. Returns NULL on failure.
+  ///
+  /// When successful, returns the new select instruction. The client is
+  /// responsible for deleting MI.
+  ///
+  /// If both sides of the select can be optimized, PreferFalse is used to pick
+  /// a side.
+  ///
+  /// @param MI          Optimizable select instruction.
+  /// @param PreferFalse Try to optimize FalseOp instead of TrueOp.
+  /// @returns Optimized instruction or NULL.
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI,
+                                       bool PreferFalse = false) const {
+    // This function must be implemented if Optimizable is ever set.
+    llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!");
+  }
+
  /// copyPhysReg - Emit instructions to copy a pair of physical registers.
  virtual void copyPhysReg(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI, DebugLoc DL,
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@ -1,4 +1,4 @@
-//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -78,6 +78,19 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
 static const uint32_t FPH_TAKEN_WEIGHT = 20;
 static const uint32_t FPH_NONTAKEN_WEIGHT = 12;

+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
 // Standard weight value. Used when none of the heuristics set weight for
 // the edge.
 static const uint32_t NORMAL_WEIGHT = 16;
@ -371,6 +384,19 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
  return true;
 }

+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+  InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+  if (!II)
+    return false;
+
+  BasicBlock *Normal = II->getNormalDest();
+  BasicBlock *Unwind = II->getUnwindDest();
+
+  setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT);
+  return true;
+}
+
 void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<LoopInfo>();
  AU.setPreservesAll();
@ -397,7 +423,9 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
      continue;
    if (calcZeroHeuristics(*I))
      continue;
-    calcFloatingPointHeuristics(*I);
+    if (calcFloatingPointHeuristics(*I))
+      continue;
+    calcInvokeHeuristics(*I);
  }

  PostDominatedByUnreachable.clear();
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@ -473,6 +473,10 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 }

 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
+  // Ignore self-referencing GEPs, they can occur in unreachable code.
+  if (&GEP == GEP.getPointerOperand())
+    return unknown();
+
  SizeOffsetType PtrData = compute(GEP.getPointerOperand());
  if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices())
    return unknown();
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@ -681,10 +681,10 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
  const MachineInstr *MI = MO->getParent();
  const MCInstrDesc &MCID = MI->getDesc();
-  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];

  // The first MCID.NumDefs operands must be explicit register defines
  if (MONum < MCID.getNumDefs()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
    if (!MO->isReg())
      report("Explicit definition must be a register", MO, MONum);
    else if (!MO->isDef() && !MCOI.isOptionalDef())
@ -692,6 +692,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
    else if (MO->isImplicit())
      report("Explicit definition marked as implicit", MO, MONum);
  } else if (MONum < MCID.getNumOperands()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
    // Don't check if it's the last operand in a variadic instruction. See,
    // e.g., LDM_RET in the arm back end.
    if (MO->isReg() &&
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@ -79,6 +79,7 @@ STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
 STATISTIC(NumLoadFold,   "Number of loads folded");
+STATISTIC(NumSelects,    "Number of selects optimized");

 namespace {
  class PeepholeOptimizer : public MachineFunctionPass {
@ -109,6 +110,7 @@ namespace {
    bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
    bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool optimizeSelect(MachineInstr *MI);
    bool isMoveImmediate(MachineInstr *MI,
                         SmallSet<unsigned, 4> &ImmDefRegs,
                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@ -386,6 +388,23 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
  return false;
 }

+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+  unsigned TrueOp = 0;
+  unsigned FalseOp = 0;
+  bool Optimizable = false;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+    return false;
+  if (!Optimizable)
+    return false;
+  if (!TII->optimizeSelect(MI))
+    return false;
+  MI->eraseFromParent();
+  ++NumSelects;
+  return true;
+}
+
 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
 /// instruction. We only fold loads to virtual registers and the virtual
 /// register defined has a single use.
@ -477,11 +496,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    ImmDefMIs.clear();
    FoldAsLoadDefReg = 0;

-    bool First = true;
-    MachineBasicBlock::iterator PMII;
    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
+      // We may be erasing MI below, increment MII now.
+      ++MII;
      LocalMIs.insert(MI);

      // If there exists an instruction which belongs to the following
@ -490,28 +509,18 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
          MI->hasUnmodeledSideEffects()) {
        FoldAsLoadDefReg = 0;
-        ++MII;
        continue;
      }
      if (MI->mayStore() || MI->isCall())
        FoldAsLoadDefReg = 0;

-      if (MI->isBitcast()) {
-        if (optimizeBitcastInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
-      } else if (MI->isCompare()) {
-        if (optimizeCmpInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
+      if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+          (MI->isSelect() && optimizeSelect(MI))) {
+        // MI is deleted.
+        LocalMIs.erase(MI);
+        Changed = true;
+        continue;
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@ -542,14 +551,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {

          // MI is replaced with FoldMI.
          Changed = true;
-          PMII = FoldMI;
-          MII = llvm::next(PMII);
          continue;
        }
      }
-      First = false;
-      PMII = MII;
-      ++MII;
    }
  }

--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
        N0.getOpcode() == ISD::AND)
      if (ConstantSDNode *AndRHS =
                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = DCI.isBeforeLegalize() ?
+        EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
          getPointerTy() : getShiftAmountTy(N0.getValueType());
        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
          // Perform the xform if the AND RHS is a single bit.
@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
          const APInt &AndRHSC = AndRHS->getAPIntValue();
          if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
            unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            EVT ShiftTy = DCI.isBeforeLegalize() ?
+            EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
              getPointerTy() : getShiftAmountTy(N0.getValueType());
            EVT CmpTy = N0.getValueType();
            SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
        }
        NewC = NewC.lshr(ShiftBits);
        if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
-          EVT ShiftTy = DCI.isBeforeLegalize() ?
+          EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
            getPointerTy() : getShiftAmountTy(N0.getValueType());
          EVT CmpTy = N0.getValueType();
          SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@ -461,6 +461,9 @@ namespace {
    /// allocateCodeSection - Allocate memory for a code section.
    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                 unsigned SectionID) {
+      // Grow the required block size to account for the block header
+      Size += sizeof(*CurBlock);
+
      // FIXME: Alignement handling.
      FreeRangeHeader* candidateBlock = FreeMemoryList;
      FreeRangeHeader* head = FreeMemoryList;
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@ -1770,23 +1770,41 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
  opStatus fs;
  assertArithmeticOK(*semantics);

+  // If the exponent is large enough, we know that this value is already
+  // integral, and the arithmetic below would potentially cause it to saturate
+  // to +/-Inf.  Bail out early instead.
+  if (exponent+1 >= (int)semanticsPrecision(*semantics))
+    return opOK;
+
  // The algorithm here is quite simple: we add 2^(p-1), where p is the
  // precision of our format, and then subtract it back off again.  The choice
  // of rounding modes for the addition/subtraction determines the rounding mode
  // for our integral rounding as well.
-  APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)),
-                        1 << (semanticsPrecision(*semantics)-1));
+  // NOTE: When the input value is negative, we do subtraction followed by
+  // addition instead.
+  APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
+  IntegerConstant <<= semanticsPrecision(*semantics)-1;
  APFloat MagicConstant(*semantics);
  fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
                                      rmNearestTiesToEven);
+  MagicConstant.copySign(*this);
+
  if (fs != opOK)
    return fs;

+  // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+  bool inputSign = isNegative();
+
  fs = add(MagicConstant, rounding_mode);
  if (fs != opOK && fs != opInexact)
    return fs;

  fs = subtract(MagicConstant, rounding_mode);
+
+  // Restore the input sign.
+  if (inputSign != isNegative())
+    changeSign();
+
  return fs;
 }

--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@ -465,6 +465,118 @@ rety_open_create:
  return error_code::success();
 }

+error_code mapped_file_region::init(int fd, uint64_t offset) {
+  AutoFD FD(fd);
+
+  // Figure out how large the file is.
+  struct stat FileInfo;
+  if (fstat(fd, &FileInfo) == -1)
+    return error_code(errno, system_category());
+  uint64_t FileSize = FileInfo.st_size;
+
+  if (Size == 0)
+    Size = FileSize;
+  else if (FileSize < Size) {
+    // We need to grow the file.
+    if (ftruncate(fd, Size) == -1)
+      return error_code(errno, system_category());
+  }
+
+  int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
+  int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#ifdef MAP_FILE
+  flags |= MAP_FILE;
+#endif
+  Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+  if (Mapping == MAP_FAILED)
+    return error_code(errno, system_category());
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  SmallString<128> path_storage;
+  StringRef name = path.toNullTerminatedStringRef(path_storage);
+  int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
+  int ofd = ::open(name.begin(), oflags);
+  if (ofd == -1) {
+    ec = error_code(errno, system_category());
+    return;
+  }
+
+  ec = init(ofd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  ec = init(fd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::munmap(Mapping, Size);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
+  other.Mapping = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  return Process::GetPageSize();
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                StringRef path){
  SmallString<128> path_null(path);
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@ -22,6 +22,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>

+#undef max
+
 // MinGW doesn't define this.
 #ifndef _ERRNO_T_DEFINED
 #define _ERRNO_T_DEFINED
@ -703,6 +705,203 @@ error_code get_magic(const Twine &path, uint32_t len,
  return error_code::success();
 }

+error_code mapped_file_region::init(int FD, uint64_t Offset) {
+  FileDescriptor = FD;
+  // Make sure that the requested size fits within SIZE_T.
+  if (Size > std::numeric_limits<SIZE_T>::max()) {
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return make_error_code(errc::invalid_argument);
+  }
+
+  DWORD flprotect;
+  switch (Mode) {
+  case readonly:  flprotect = PAGE_READONLY; break;
+  case readwrite: flprotect = PAGE_READWRITE; break;
+  case priv:      flprotect = PAGE_WRITECOPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+
+  FileMappingHandle = ::CreateFileMapping(FileHandle,
+                                          0,
+                                          flprotect,
+                                          Size >> 32,
+                                          Size & 0xffffffff,
+                                          0);
+  if (FileMappingHandle == NULL) {
+    error_code ec = windows_error(GetLastError());
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  DWORD dwDesiredAccess;
+  switch (Mode) {
+  case readonly:  dwDesiredAccess = FILE_MAP_READ; break;
+  case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break;
+  case priv:      dwDesiredAccess = FILE_MAP_COPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+  Mapping = ::MapViewOfFile(FileMappingHandle,
+                            dwDesiredAccess,
+                            Offset >> 32,
+                            Offset & 0xffffffff,
+                            Size);
+  if (Mapping == NULL) {
+    error_code ec = windows_error(GetLastError());
+    ::CloseHandle(FileMappingHandle);
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  if (Size == 0) {
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
+    if (Result == 0) {
+      error_code ec = windows_error(GetLastError());
+      ::UnmapViewOfFile(Mapping);
+      ::CloseHandle(FileMappingHandle);
+      if (FileDescriptor)
+        _close(FileDescriptor);
+      else
+        ::CloseHandle(FileHandle);
+      return ec;
+    }
+    Size = mbi.RegionSize;
+  }
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec) 
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor()
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  // Convert path to UTF-16.
+  if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+    return;
+
+  // Get file handle for creating a file mapping.
+  FileHandle = ::CreateFileW(c_str(path_utf16),
+                             Mode == readonly ? GENERIC_READ
+                                              : GENERIC_READ | GENERIC_WRITE,
+                             Mode == readonly ? FILE_SHARE_READ
+                                              : 0,
+                             0,
+                             Mode == readonly ? OPEN_EXISTING
+                                              : OPEN_ALWAYS,
+                             Mode == readonly ? FILE_ATTRIBUTE_READONLY
+                                              : FILE_ATTRIBUTE_NORMAL,
+                             0);
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    ec = windows_error(::GetLastError());
+    return;
+  }
+
+  FileDescriptor = 0;
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor(fd)
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    _close(FileDescriptor);
+    FileDescriptor = 0;
+    ec = make_error_code(errc::bad_file_descriptor);
+    return;
+  }
+
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::UnmapViewOfFile(Mapping);
+  if (FileMappingHandle)
+    ::CloseHandle(FileMappingHandle);
+  if (FileDescriptor)
+    _close(FileDescriptor);
+  else if (FileHandle != INVALID_HANDLE_VALUE)
+    ::CloseHandle(FileHandle);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode)
+  , Size(other.Size)
+  , Mapping(other.Mapping)
+  , FileDescriptor(other.FileDescriptor)
+  , FileHandle(other.FileHandle)
+  , FileMappingHandle(other.FileMappingHandle) {
+  other.Mapping = other.FileMappingHandle = 0;
+  other.FileHandle = INVALID_HANDLE_VALUE;
+  other.FileDescriptor = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  SYSTEM_INFO SysInfo;
+  ::GetSystemInfo(&SysInfo);
+  return SysInfo.dwAllocationGranularity;
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                StringRef path){
  SmallVector<wchar_t, 128> path_utf16;
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@ -532,7 +532,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
    // This modifier is not yet supported.
    case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
      return true;
-    case 'H': // The highest-numbered register of a pair.
+    case 'H': { // The highest-numbered register of a pair.
      const MachineOperand &MO = MI->getOperand(OpNum);
      if (!MO.isReg())
        return true;
@ -547,6 +547,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
      O << ARMInstPrinter::getRegisterName(Reg);
      return false;
    }
+    }
  }

  printOperand(MI, OpNum, O);
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@ -1568,6 +1568,136 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
  return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
 }

+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+                                 const MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return 0;
+  if (!MRI.hasOneNonDBGUse(Reg))
+    return 0;
+  MI = MRI.getVRegDef(Reg);
+  if (!MI)
+    return 0;
+  // Check if MI has any non-dead defs or physreg uses. This also detects
+  // predicated instructions which will be reading CPSR.
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+      return 0;
+    if (MO.isDef() && !MO.isDead())
+      return 0;
+  }
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::ANDri:   return ARM::ANDCCri;
+  case ARM::ANDrr:   return ARM::ANDCCrr;
+  case ARM::ANDrsi:  return ARM::ANDCCrsi;
+  case ARM::ANDrsr:  return ARM::ANDCCrsr;
+  case ARM::t2ANDri: return ARM::t2ANDCCri;
+  case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+  case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+  case ARM::EORri:   return ARM::EORCCri;
+  case ARM::EORrr:   return ARM::EORCCrr;
+  case ARM::EORrsi:  return ARM::EORCCrsi;
+  case ARM::EORrsr:  return ARM::EORCCrsr;
+  case ARM::t2EORri: return ARM::t2EORCCri;
+  case ARM::t2EORrr: return ARM::t2EORCCrr;
+  case ARM::t2EORrs: return ARM::t2EORCCrs;
+  case ARM::ORRri:   return ARM::ORRCCri;
+  case ARM::ORRrr:   return ARM::ORRCCrr;
+  case ARM::ORRrsi:  return ARM::ORRCCrsi;
+  case ARM::ORRrsr:  return ARM::ORRCCrsr;
+  case ARM::t2ORRri: return ARM::t2ORRCCri;
+  case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+  case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+  // ARM ADD/SUB
+  case ARM::ADDri:   return ARM::ADDCCri;
+  case ARM::ADDrr:   return ARM::ADDCCrr;
+  case ARM::ADDrsi:  return ARM::ADDCCrsi;
+  case ARM::ADDrsr:  return ARM::ADDCCrsr;
+  case ARM::SUBri:   return ARM::SUBCCri;
+  case ARM::SUBrr:   return ARM::SUBCCrr;
+  case ARM::SUBrsi:  return ARM::SUBCCrsi;
+  case ARM::SUBrsr:  return ARM::SUBCCrsr;
+
+  // Thumb2 ADD/SUB
+  case ARM::t2ADDri:   return ARM::t2ADDCCri;
+  case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+  case ARM::t2ADDrr:   return ARM::t2ADDCCrr;
+  case ARM::t2ADDrs:   return ARM::t2ADDCCrs;
+  case ARM::t2SUBri:   return ARM::t2SUBCCri;
+  case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+  case ARM::t2SUBrr:   return ARM::t2SUBCCrr;
+  case ARM::t2SUBrs:   return ARM::t2SUBCCrs;
+  }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     unsigned &TrueOp, unsigned &FalseOp,
+                                     bool &Optimizable) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  // MOVCC operands:
+  // 0: Def.
+  // 1: True use.
+  // 2: False use.
+  // 3: Condition code.
+  // 4: CPSR use.
+  TrueOp = 1;
+  FalseOp = 2;
+  Cond.push_back(MI->getOperand(3));
+  Cond.push_back(MI->getOperand(4));
+  // We can always fold a def.
+  Optimizable = true;
+  return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+                                               bool PreferFalse) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineInstr *DefMI = 0;
+  unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+  bool Invert = !Opc;
+  if (!Opc)
+    Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+  if (!Opc)
+    return 0;
+
+  // Create a new predicated version of DefMI.
+  // Rfalse is the first use.
+  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      get(Opc), MI->getOperand(0).getReg())
+    .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+  // Copy all the DefMI operands, excluding its (null) predicate.
+  const MCInstrDesc &DefDesc = DefMI->getDesc();
+  for (unsigned i = 1, e = DefDesc.getNumOperands();
+       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+    NewMI.addOperand(DefMI->getOperand(i));
+
+  unsigned CondCode = MI->getOperand(3).getImm();
+  if (Invert)
+    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+  else
+    NewMI.addImm(CondCode);
+  NewMI.addOperand(MI->getOperand(4));
+
+  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+  if (NewMI->hasOptionalDef())
+    AddDefaultCC(NewMI);
+
+  // The caller will erase MI, but not DefMI.
+  DefMI->eraseFromParent();
+  return NewMI;
+}
+
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
 /// def operand.
@ -3224,11 +3354,18 @@ enum ARMExeDomain {
 //
 std::pair<uint16_t, uint16_t>
 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
-  // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
-  // predicated.
+  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+  // if they are not predicated.
  if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));

+  // Cortex-A9 is particularly picky about mixing the two and wants these
+  // converted.
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+      (MI->getOpcode() == ARM::VMOVRS ||
+       MI->getOpcode() == ARM::VMOVSR))
+    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
  // No other instructions can be swizzled, so just determine their domain.
  unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;

@ -3248,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {

 void
 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
-  // We only know how to change VMOVD into VORR.
-  assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
-  if (Domain != ExeNEON)
-    return;
+  unsigned DstReg, SrcReg, DReg;
+  unsigned Lane;
+  MachineInstrBuilder MIB(MI);
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  bool isKill;
+  switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("cannot handle opcode!");
+      break;
+    case ARM::VMOVD:
+      if (Domain != ExeNEON)
+        break;

-  // Zap the predicate operands.
-  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
-  MI->RemoveOperand(3);
-  MI->RemoveOperand(2);
+      // Zap the predicate operands.
+      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);

-  // Change to a VORRd which requires two identical use operands.
-  MI->setDesc(get(ARM::VORRd));
+      // Change to a VORRd which requires two identical use operands.
+      MI->setDesc(get(ARM::VORRd));
+
+      // Add the extra source operand and new predicates.
+      // This will go before any implicit ops.
+      AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+      break;
+    case ARM::VMOVRS:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+
+      DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
+
+      MI->setDesc(get(ARM::VGETLNi32));
+      MIB.addReg(DReg);
+      MIB.addImm(Lane);
+
+      MIB->getOperand(1).setIsUndef();
+      MIB.addReg(SrcReg, RegState::Implicit);
+
+      AddDefaultPred(MIB);
+      break;
+    case ARM::VMOVSR:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+      DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+      isKill = MI->getOperand(0).isKill();
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(ARM::VSETLNi32));
+      MIB.addReg(DReg);
+      MIB.addReg(DReg);
+      MIB.addReg(SrcReg);
+      MIB.addImm(Lane);
+
+      MIB->getOperand(1).setIsUndef();
+
+      if (isKill)
+        MIB->addRegisterKilled(DstReg, TRI, true);
+      MIB->addRegisterDefined(DstReg, TRI);
+
+      AddDefaultPred(MIB);
+      break;
+  }

-  // Add the extra source operand and new predicates.
-  // This will go before any implicit ops.
-  AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
 }

 bool ARMBaseInstrInfo::hasNOP() const {
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@ -202,6 +202,13 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
                                    unsigned SrcReg2, int CmpMask, int CmpValue,
                                    const MachineRegisterInfo *MRI) const;

+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const;
+
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
  /// instruction, try to fold the immediate into the use instruction.
  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);

 int getMatchingCondBranchOpcode(int Opc);

+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+                                  MachineInstr *&MI,
+                                  const MachineRegisterInfo &MRI);

 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
 /// the instruction is encoded with an 'S' bit is determined by the optional
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@ -1821,9 +1821,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
  default:
    llvm_unreachable("Unsupported calling convention");
  case CallingConv::Fast:
-    // Ignore fastcc. Silence compiler warnings.
-    (void)RetFastCC_ARM_APCS;
-    (void)FastCC_ARM_APCS;
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
    // Fallthrough
  case CallingConv::C:
    // Use target triple & subtarget features to do actual dispatch.
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@ -2385,8 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
      case ARMISD::COR:  Opc = ARM::t2ORRCCrs; break;
      case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
      }
-      SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+      SDValue Ops[] = {
+        FalseVal, FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag
+      };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
    }

    ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
@ -2401,8 +2403,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
        case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
        }
        SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-        SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+        SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
      }
    }

@ -2413,8 +2415,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
    case ARMISD::COR:  Opc = ARM::t2ORRCCrr; break;
    case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
    }
-    SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+    SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
  }

  SDValue CPTmp0;
@ -2428,8 +2430,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
    case ARMISD::COR:  Opc = ARM::ORRCCrsi; break;
    case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
    }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+    SDValue Ops[] = {
+      FalseVal, FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag
+    };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
  }

  if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
@ -2440,8 +2444,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
    case ARMISD::COR:  Opc = ARM::ORRCCrsr; break;
    case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
    }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
+    SDValue Ops[] = {
+      FalseVal, FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag
+    };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 9);
  }

  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
@ -2456,8 +2462,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
      case ARMISD::CXOR: Opc = ARM::EORCCri; break;
      }
      SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-      SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+      SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
    }
  }

@ -2468,8 +2474,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
  case ARMISD::COR:  Opc = ARM::ORRCCrr; break;
  case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
  }
-  SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+  SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
 }

 /// Target-specific DAG combining for ISD::XOR.
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -6973,6 +6973,27 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 //                           ARM Optimization Hooks
 //===----------------------------------------------------------------------===//

+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  if (!C)
+    return false;
+  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Combine a constant select operand into its use:
+//
+//   (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+//   (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null.
+//
+// @param N       The node to transform.
+// @param Slct    The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI     Context.
+// @returns The new node, or SDValue() on failure.
 static
 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
                            TargetLowering::DAGCombinerInfo &DCI) {
@ -6998,16 +7019,12 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
          "Bad input!");

-  if (LHS.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(LHS)->isNullValue()) {
+  if (isZeroOrAllOnes(LHS, false)) {
    DoXform = true;
-  } else if (CC != ISD::SETCC_INVALID &&
-             RHS.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHS)->isNullValue()) {
+  } else if (CC != ISD::SETCC_INVALID && isZeroOrAllOnes(RHS, false)) {
    std::swap(LHS, RHS);
    SDValue Op0 = Slct.getOperand(0);
-    EVT OpVT = isSlctCC ? Op0.getValueType() :
-                          Op0.getOperand(0).getValueType();
+    EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType();
    bool isInt = OpVT.isInteger();
    CC = ISD::getSetCCInverse(CC, isInt);

@ -7018,19 +7035,19 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
    InvCC = true;
  }

-  if (DoXform) {
-    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
-    if (isSlctCC)
-      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
-                             Slct.getOperand(0), Slct.getOperand(1), CC);
-    SDValue CCOp = Slct.getOperand(0);
-    if (InvCC)
-      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
-                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
-    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
-                       CCOp, OtherOp, Result);
-  }
-  return SDValue();
+  if (!DoXform)
+    return SDValue();
+
+  SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+  if (isSlctCC)
+    return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                           Slct.getOperand(0), Slct.getOperand(1), CC);
+  SDValue CCOp = Slct.getOperand(0);
+  if (InvCC)
+    CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                        CCOp.getOperand(0), CCOp.getOperand(1), CC);
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                     CCOp, OtherOp, Result);
 }

 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
@ -7297,16 +7314,8 @@ static SDValue PerformMULCombine(SDNode *N,
 }

 static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
-  if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
-    return false;
-
-  SDValue FalseVal = N.getOperand(0);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
-  if (!C)
-    return false;
-  if (AllOnes)
-    return C->isAllOnesValue();
-  return C->isNullValue();
+  return N.getOpcode() == ARMISD::CMOV && N.getNode()->hasOneUse() &&
+    isZeroOrAllOnes(N.getOperand(0), AllOnes);
 }

 /// formConditionalOp - Combine an operation with a conditional move operand
@ -8808,6 +8817,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
  case MVT::i16:
  case MVT::i32:
    return true;
+  case MVT::f64:
+    return Subtarget->hasNEON();
  // FIXME: VLD1 etc with standard alignment is legal.
  }
 }
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@ -242,6 +242,9 @@ def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
 def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                 "Subtarget->isTargetDarwin()">;

+def IsLE             : Predicate<"TLI.isLittleEndian()">;
+def IsBE             : Predicate<"TLI.isBigEndian()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.

@ -3936,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs),
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {

-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
                           4, IIC_iCMOVr,
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
@ -3989,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
                          InstrItinClass iii, InstrItinClass iir,
                          InstrItinClass iis> {
  def ri  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
+                                 pred:$p, cc_out:$s),
                            4, iii, [],
                       (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
  def rr  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
+                                 pred:$p, cc_out:$s),
                            4, iir, [],
                           (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
  def rsi : ARMPseudoExpand<(outs GPR:$Rd),
-                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
+                                 pred:$p, cc_out:$s),
                            4, iis, [],
                (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
  def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
-                       (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+                           (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
+                                pred:$p, cc_out:$s),
                            4, iis, [],
                (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
 }

 defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
@ -4016,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;

 } // neverHasSideEffects

--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
 }

+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                 (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                    (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;

 //===----------------------------------------------------------------------===//
 // NEON-specific DAG Nodes.
@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;

 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1

+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;

 //===----------------------------------------------------------------------===//
 // NEON pattern fragments
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@ -757,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
     let Inst{24} = 1;
     let Inst{23-21} = op23_21;
   }
+
+   // Predicated versions.
+   def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
+                                  pred:$p),
+                             4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
+                              GPR:$Rn, imm0_4095:$imm, pred:$p)>,
+                RegConstraint<"$Rfalse = $Rd">;
+   def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUr, [],
+                             (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
+                              GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
+                             (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
 }

 /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@ -2938,7 +2965,7 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {

-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
                            (ins rGPR:$false, rGPR:$Rm, pred:$p),
                            4, IIC_iCMOVr,
@ -3026,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
                   InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
   // shifted imm
   def ri : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
+                                pred:$p, cc_out:$s),
                           4, iii, [],
                  (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
   // register
   def rr : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
+                                pred:$p, cc_out:$s),
                           4, iir, [],
                        (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
   // shifted register
   def rs : t2PseudoExpand<(outs rGPR:$Rd),
-                       (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+                                pred:$p, cc_out:$s),
                           4, iis, [],
            (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
 } // T2I_bincc_irs

 defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
  let ParserMatchClass = FPImmOperand;
 }

+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
 // The VCVT to/from fixed-point instructions encode the 'fbits' operand
 // (the number of fixed bits) differently than it appears in the assembly
 // source. It's encoded as "Size - fbits" where Size is the size of the
@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {

 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                 IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;

 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                 IIC_fpLoad32, "vldr", "\t$Sd, $addr",
@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),

 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                 IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;

 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                 IIC_fpStore32, "vstr", "\t$Sd, $addr",
@ -433,25 +442,25 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
 // Between half-precision and single-precision.  For disassembly only.

 // FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
                 [/* For disassembly only; pattern left blank */]>;

+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+                 [/* For disassembly only; pattern left blank */]>;
+
 def : ARMPat<(f32_to_f16 SPR:$a),
             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;

-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
-                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>;
-
 def : ARMPat<(f16_to_f32 GPR:$a),
             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;

-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                 [/* For disassembly only; pattern left blank */]>;

-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
                 [/* For disassembly only; pattern left blank */]>;

--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@ -16,6 +16,7 @@
 #include "MipsRegisterInfo.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
    return MCDisassembler::Fail;

  // Calling the auto-generated decoder function.
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
  if (Result != MCDisassembler::Fail) {
    Size = 4;
    return Result;
@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr,
    return MCDisassembler::Fail;

  // Calling the auto-generated decoder function.
-  Result = decodeMips64Instruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+                             this, STI);
  if (Result != MCDisassembler::Fail) {
    Size = 4;
    return Result;
  }
  // If we fail to decode in Mips64 decoder space we can try in Mips32
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
  if (Result != MCDisassembler::Fail) {
    Size = 4;
    return Result;
@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
                              uint64_t Address,
                              const void *Decoder) {
  int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);

  Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
  Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
                               uint64_t Address,
                               const void *Decoder) {
  int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);

  Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
  Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
                                     uint64_t Address,
                                     const void *Decoder) {

-  unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2;
+  unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
  Inst.addOperand(MCOperand::CreateImm(JumpOffset));
  return MCDisassembler::Success;
 }
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@ -44,6 +44,8 @@ def FeatureN64         : SubtargetFeature<"n64", "MipsABI", "N64",
                                "Enable n64 ABI">;
 def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
                                "Enable eabi ABI">;
+def FeatureAndroid     : SubtargetFeature<"android", "IsAndroid", "true",
+                                "Target is android">;
 def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
                                "true", "Enable vector FPU instructions.">;
 def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@ -144,6 +144,17 @@ def RetCC_MipsEABI : CallingConv<[
  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
 ]>;

+//===----------------------------------------------------------------------===//
+// Mips Android Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MipsAndroid : CallingConv<[
+  // f32 are returned in registers F0, F2, F1, F3
+  CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
+
+  CCDelegateTo<RetCC_MipsO32>
+]>;
+
 //===----------------------------------------------------------------------===//
 // Mips FastCC Calling Convention
 //===----------------------------------------------------------------------===//
@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[
  CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
  CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
  CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+  CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
  CCDelegateTo<RetCC_MipsO32>
 ]>;

--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@ -89,6 +89,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
  // InMips16 -- can process Mips16 instructions
  bool InMips16Mode;

+  // IsAndroid -- target is android
+  bool IsAndroid;
+
  InstrItineraryData InstrItins;

 public:
@ -128,6 +131,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
  bool isNotSingleFloat() const { return !IsSingleFloat; }
  bool hasVFPU() const { return HasVFPU; }
  bool inMips16Mode() const { return InMips16Mode; }
+  bool isAndroid() const { return IsAndroid; }
  bool isLinux() const { return IsLinux; }

  bool hasStandardEncoding() const { return !inMips16Mode(); }
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);

-  // We do not currently implment this libm ops for PowerPC.
+  // We do not currently implement these libm ops for PowerPC.
  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@ -17,14 +17,14 @@
 include "llvm/Target/Target.td"

 //===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
 //

 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
                                  "64-bit mode (x86_64)">;

 //===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
 //===----------------------------------------------------------------------===//

 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
@ -97,7 +97,7 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                      [FeatureAVX, FeatureSSE4A]>;
 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                      "Enable XOP instructions",
-                                      [FeatureAVX, FeatureSSE4A]>;
+                                      [FeatureFMA4]>;
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                          "HasVectorUAMem", "true",
                 "Allow unaligned memory operands on vector/SIMD instructions">;
@ -226,7 +226,7 @@ def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                               FeatureAES, FeaturePCLMUL,
                               FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI]>;
+                               FeaturePOPCNT, FeatureBMI, FeatureFMA]>;

 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [Feature3DNow]>;
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
  return SDValue();
 }

+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDNode *N = Op.getNode();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = Op.getNumOperands();
+
+  // Check supported types and sub-targets.
+  //
+  // Only v2f32 -> v2f64 needs special handling.
+  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+    return SDValue();
+
+  SDValue VecIn;
+  EVT VecInVT;
+  SmallVector<int, 8> Mask;
+  EVT SrcVT = MVT::Other;
+
+  // Check the patterns could be translated into X86vfpext.
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opcode = In.getOpcode();
+
+    // Skip if the element is undefined.
+    if (Opcode == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    // Quit if one of the elements is not defined from 'fpext'.
+    if (Opcode != ISD::FP_EXTEND)
+      return SDValue();
+
+    // Check how the source of 'fpext' is defined.
+    SDValue L2In = In.getOperand(0);
+    EVT L2InVT = L2In.getValueType();
+
+    // Check the original type
+    if (SrcVT == MVT::Other)
+      SrcVT = L2InVT;
+    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+      return SDValue();
+
+    // Check whether the value being 'fpext'ed is extracted from the same
+    // source.
+    Opcode = L2In.getOpcode();
+
+    // Quit if it's not extracted with a constant index.
+    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(L2In.getOperand(1)))
+      return SDValue();
+
+    SDValue ExtractedFromVec = L2In.getOperand(0);
+
+    if (VecIn.getNode() == 0) {
+      VecIn = ExtractedFromVec;
+      VecInVT = ExtractedFromVec.getValueType();
+    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+      return SDValue();
+
+    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+  }
+
+  // Fill the remaining mask as undef.
+  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+    Mask.push_back(-1);
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getVectorShuffle(VecInVT, DL,
+                                          VecIn, DAG.getUNDEF(VecInVT),
+                                          &Mask[0]));
+}
+
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  DebugLoc dl = Op.getDebugLoc();
@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  if (Broadcast.getNode())
    return Broadcast;

+  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+  if (FpExt.getNode())
+    return FpExt;
+
  unsigned EVTBits = ExtVT.getSizeInBits();

  unsigned NumZero  = 0;
@ -11122,9 +11202,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
  Results.push_back(Swap.getValue(1));
 }

-void X86TargetLowering::
+static void
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
-                        SelectionDAG &DAG, unsigned NewOp) const {
+                        SelectionDAG &DAG, unsigned NewOp) {
  DebugLoc dl = Node->getDebugLoc();
  assert (Node->getValueType(0) == MVT::i64 &&
          "Only know how to expand i64 atomics");
@ -11245,26 +11325,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    return;
  }
  case ISD::ATOMIC_LOAD_ADD:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
-    return;
  case ISD::ATOMIC_LOAD_AND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
-    return;
  case ISD::ATOMIC_LOAD_NAND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
-    return;
  case ISD::ATOMIC_LOAD_OR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
-    return;
  case ISD::ATOMIC_LOAD_SUB:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
-    return;
  case ISD::ATOMIC_LOAD_XOR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
-    return;
-  case ISD::ATOMIC_SWAP:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+  case ISD::ATOMIC_SWAP: {
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected opcode");
+    case ISD::ATOMIC_LOAD_ADD:
+      Opc = X86ISD::ATOMADD64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_AND:
+      Opc = X86ISD::ATOMAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_NAND:
+      Opc = X86ISD::ATOMNAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_OR:
+      Opc = X86ISD::ATOMOR64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_SUB:
+      Opc = X86ISD::ATOMSUB64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_XOR:
+      Opc = X86ISD::ATOMXOR64_DAG;
+      break;
+    case ISD::ATOMIC_SWAP:
+      Opc = X86ISD::ATOMSWAP64_DAG;
+      break;
+    }
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
    return;
+  }
  case ISD::ATOMIC_LOAD:
    ReplaceATOMIC_LOAD(N, Results, DAG);
  }
@ -11342,7 +11436,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
  case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
  case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
  case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
  case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
  case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
  case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
  case X86ISD::VSHL:               return "X86ISD::VSHL";
@ -12792,16 +12888,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    // String/text processing lowering.
  case X86::PCMPISTRM128REG:
  case X86::VPCMPISTRM128REG:
-    return EmitPCMP(MI, BB, 3, false /* in-mem */);
  case X86::PCMPISTRM128MEM:
  case X86::VPCMPISTRM128MEM:
-    return EmitPCMP(MI, BB, 3, true /* in-mem */);
  case X86::PCMPESTRM128REG:
  case X86::VPCMPESTRM128REG:
-    return EmitPCMP(MI, BB, 5, false /* in mem */);
  case X86::PCMPESTRM128MEM:
-  case X86::VPCMPESTRM128MEM:
-    return EmitPCMP(MI, BB, 5, true /* in mem */);
+  case X86::VPCMPESTRM128MEM: {
+    unsigned NumArgs;
+    bool MemArg;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::PCMPISTRM128REG:
+    case X86::VPCMPISTRM128REG:
+      NumArgs = 3; MemArg = false; break;
+    case X86::PCMPISTRM128MEM:
+    case X86::VPCMPISTRM128MEM:
+      NumArgs = 3; MemArg = true; break;
+    case X86::PCMPESTRM128REG:
+    case X86::VPCMPESTRM128REG:
+      NumArgs = 5; MemArg = false; break;
+    case X86::PCMPESTRM128MEM:
+    case X86::VPCMPESTRM128MEM:
+      NumArgs = 5; MemArg = true; break;
+    }
+    return EmitPCMP(MI, BB, NumArgs, MemArg);
+  }

    // Thread synchronization.
  case X86::MONITOR:
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -227,6 +227,9 @@ namespace llvm {
      // VSEXT_MOVL - Vector move low and sign extend.
      VSEXT_MOVL,

+      // VFPEXT - Vector FP extend.
+      VFPEXT,
+
      // VSHL, VSRL - 128-bit vector logical left / right shift
      VSHLDQ, VSRLDQ,

@ -828,6 +831,8 @@ namespace llvm {
    SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;

+    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
    virtual SDValue
      LowerFormalArguments(SDValue Chain,
                           CallingConv::ID CallConv, bool isVarArg,
@ -859,9 +864,6 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   LLVMContext &Context) const;

-    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp) const;
-
    /// Utility function to emit string processing sse4.2 instructions
    /// that return in xmm0.
    /// This takes the instruction to expand, the associated machine basic
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@ -81,6 +81,11 @@ def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",

 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in {
  def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
            (VCVTPD2PSYrm addr:$src)>;

+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (VCVTPS2PDrr VR128:$src)>;
  def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
            (VCVTPS2PDYrr VR128:$src)>;
  def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
            (VCVTPS2PDYrm addr:$src)>;
 }

+let Predicates = [HasSSE2] in {
+  // Match fextend for 128 conversions
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (CVTPS2PDrr VR128:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
  // if the size is something we can handle with a single primitive load/store.
  // A single load+store correctly handles overlapping memory in the memmove
  // case.
-  unsigned Size = MemOpLength->getZExtValue();
-  if (Size == 0) return MI;  // Delete this mem transfer.
+  uint64_t Size = MemOpLength->getLimitedValue();
+  assert(Size && "0-sized memory transfering should be removed already.");

  if (Size > 8 || (Size&(Size-1)))
    return 0;  // If not 1/2/4/8 bytes, exit.
@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
    return 0;
-  uint64_t Len = LenC->getZExtValue();
+  uint64_t Len = LenC->getLimitedValue();
  Alignment = MI->getAlignment();
-
-  // If the length is zero, this is a no-op
-  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  assert(Len && "0-sized memory setting should be removed already.");

  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@ -86,6 +86,9 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
 static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
       cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
       cl::Hidden, cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+       cl::desc("use instrumentation with slow path for all accesses"),
+       cl::Hidden, cl::init(false));
 // This flag limits the number of instructions to be instrumented
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),
 // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
@ -159,7 +162,7 @@ struct AddressSanitizer : public ModulePass {
                         Value *Addr, uint32_t TypeSize, bool IsWrite);
  Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                           Value *ShadowValue, uint32_t TypeSize);
-  Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC,
+  Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                 bool IsWrite, size_t AccessSizeIndex);
  bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
  void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
@ -251,24 +254,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
 //     ThenBlock
 //   Tail
 //
-// If ThenBlock is zero, a new block is created and its terminator is returned.
-// Otherwize 0 is returned.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,
-                                             BasicBlock *ThenBlock = 0) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
  Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
  BasicBlock *Head = SplitBefore->getParent();
  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
  TerminatorInst *HeadOldTerm = Head->getTerminator();
-  BranchInst *CheckTerm = 0;
-  if (!ThenBlock) {
-    LLVMContext &C = Head->getParent()->getParent()->getContext();
-    ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  LLVMContext &C = Head->getParent()->getParent()->getContext();
+  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  TerminatorInst *CheckTerm;
+  if (Unreachable)
+    CheckTerm = new UnreachableInst(C, ThenBlock);
+  else
    CheckTerm = BranchInst::Create(Tail, ThenBlock);
-  }
  BranchInst *HeadNewTerm =
    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
  return CheckTerm;
 }

@ -320,7 +323,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,

    Value *Cmp = IRB.CreateICmpNE(Length,
                                  Constant::getNullValue(Length->getType()));
-    InsertBefore = splitBlockAndInsertIfThen(Cmp);
+    InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
  }

  instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
@ -391,15 +394,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
 }

 Instruction *AddressSanitizer::generateCrashCode(
-    BasicBlock *BB, Value *Addr, Value *PC,
+    Instruction *InsertBefore, Value *Addr,
    bool IsWrite, size_t AccessSizeIndex) {
-  IRBuilder<> IRB(BB->getFirstNonPHI());
-  CallInst *Call;
-  if (PC)
-    Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex],
-                           Addr, PC);
-  else
-    Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+  IRBuilder<> IRB(InsertBefore);
+  CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+                                  Addr);
  // We don't do Call->setDoesNotReturn() because the BB already has
  // UnreachableInst at the end.
  // This EmptyAsm is required to avoid callback merge.
@ -420,7 +419,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
        LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
  LastAccessedByte = IRB.CreateIntCast(
-      LastAccessedByte, IRB.getInt8Ty(), false);
+      LastAccessedByte, ShadowValue->getType(), false);
  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
@ -440,26 +439,27 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
      IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));

  Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
-  BasicBlock *CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F);
-  new UnreachableInst(*C, CrashBlock);
  size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-  Instruction *Crash =
-      generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex);
-  Crash->setDebugLoc(OrigIns->getDebugLoc());
-
  size_t Granularity = 1 << MappingScale;
-  if (TypeSize < 8 * Granularity) {
-    BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp);
-    assert(CheckTerm->isUnconditional());
+  TerminatorInst *CrashTerm = 0;
+
+  if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+    TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
    BasicBlock *NextBB = CheckTerm->getSuccessor(0);
    IRB.SetInsertPoint(CheckTerm);
    Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+    BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+    CrashTerm = new UnreachableInst(*C, CrashBlock);
    BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
    ReplaceInstWithInst(CheckTerm, NewTerm);
  } else {
-    splitBlockAndInsertIfThen(Cmp, CrashBlock);
+    CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
  }
+
+  Instruction *Crash =
+      generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
 }

 // This function replaces all global variables with new variables that have
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
  cl::desc("Disable branch optimizations in CodeGenPrepare"));

-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
-  "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
-  cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
 static cl::opt<bool> DisableSelectToBranch(
  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
  cl::desc("Disable select to branch conversion."));
@ -188,10 +183,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
          WorkList.insert(*II);
    }

-    if (!DisableDeleteDeadBlocks)
-      for (SmallPtrSet<BasicBlock*, 8>::iterator
-             I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
-        DeleteDeadBlock(*I);
+    for (SmallPtrSet<BasicBlock*, 8>::iterator
+           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+      DeleteDeadBlock(*I);

    // Merge pairs of basic blocks with unconditional branches, connected by
    // a single edge.
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@ -613,8 +613,8 @@ namespace {
    void verifyRemoved(const Instruction *I) const;
    bool splitCriticalEdges();
    unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
-                                         const BasicBlock *Root);
-    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root);
+                                         const BasicBlockEdge &Root);
+    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
  };

  char GVN::ID = 0;
@ -2004,22 +2004,13 @@ Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
 /// use is dominated by the given basic block.  Returns the number of uses that
 /// were replaced.
 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
-                                          const BasicBlock *Root) {
+                                          const BasicBlockEdge &Root) {
  unsigned Count = 0;
  for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
       UI != UE; ) {
    Use &U = (UI++).getUse();

-    // If From occurs as a phi node operand then the use implicitly lives in the
-    // corresponding incoming block.  Otherwise it is the block containing the
-    // user that must be dominated by Root.
-    BasicBlock *UsingBlock;
-    if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
-      UsingBlock = PN->getIncomingBlock(U);
-    else
-      UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
-    if (DT->dominates(Root, UsingBlock)) {
+    if (DT->dominates(Root, U)) {
      U.set(To);
      ++Count;
    }
@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
  return Count;
 }

+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
+/// true if every path from the entry block to 'Dst' passes via this edge.  In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+                                       DominatorTree *DT) {
+  // While in theory it is interesting to consider the case in which Dst has
+  // more than one predecessor, because Dst might be part of a loop which is
+  // only reachable from Src, in practice it is pointless since at the time
+  // GVN runs all such loops have preheaders, which means that Dst will have
+  // been changed to have only one predecessor, namely Src.
+  const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+  const BasicBlock *Src = E.getStart();
+  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+  (void)Src;
+  return Pred != 0;
+}
+
 /// propagateEquality - The given values are known to be equal in every block
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+                            const BasicBlockEdge &Root) {
  SmallVector<std::pair<Value*, Value*>, 4> Worklist;
  Worklist.push_back(std::make_pair(LHS, RHS));
  bool Changed = false;
+  // For speed, compute a conservative fast approximation to
+  // DT->dominates(Root, Root.getEnd());
+  bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);

  while (!Worklist.empty()) {
    std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
        LVN = RVN;
      }
    }
-    assert((!isa<Instruction>(RHS) ||
-            DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
-           "Instruction doesn't dominate scope!");

    // If value numbering later sees that an instruction in the scope is equal
    // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
    // if RHS is an instruction (if an instruction in the scope is morphed into
    // LHS then it will be turned into RHS by the next GVN iteration anyway, so
    // using the leader table is about compiling faster, not optimizing better).
-    if (!isa<Instruction>(RHS))
-      addToLeaderTable(LVN, RHS, Root);
+    // The leader table only tracks basic blocks, not edges. Only add to if we
+    // have the simple case where the edge dominates the end.
+    if (RootDominatesEnd && !isa<Instruction>(RHS))
+      addToLeaderTable(LVN, RHS, Root.getEnd());

    // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
    // LHS always has at least one use that is not dominated by Root, this will
@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
      // If the number we were assigned was brand new then there is no point in
      // looking for an instruction realizing it: there cannot be one!
      if (Num < NextNum) {
-        Value *NotCmp = findLeader(Root, Num);
+        Value *NotCmp = findLeader(Root.getEnd(), Num);
        if (NotCmp && isa<Instruction>(NotCmp)) {
          unsigned NumReplacements =
            replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
      }
      // Ensure that any instruction in scope that gets the "A < B" value number
      // is replaced with false.
-      addToLeaderTable(Num, NotVal, Root);
+      // The leader table only tracks basic blocks, not edges. Only add to if we
+      // have the simple case where the edge dominates the end.
+      if (RootDominatesEnd)
+        addToLeaderTable(Num, NotVal, Root.getEnd());

      continue;
    }
@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
  return Changed;
 }

-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
-/// true if every path from the entry block to 'Dst' passes via this edge.  In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
-                                       DominatorTree *DT) {
-  // While in theory it is interesting to consider the case in which Dst has
-  // more than one predecessor, because Dst might be part of a loop which is
-  // only reachable from Src, in practice it is pointless since at the time
-  // GVN runs all such loops have preheaders, which means that Dst will have
-  // been changed to have only one predecessor, namely Src.
-  BasicBlock *Pred = Dst->getSinglePredecessor();
-  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
-  (void)Src;
-  return Pred != 0;
-}
-
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I) {
@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {

    BasicBlock *TrueSucc = BI->getSuccessor(0);
    BasicBlock *FalseSucc = BI->getSuccessor(1);
+    // Avoid multiple edges early.
+    if (TrueSucc == FalseSucc)
+      return false;
+
    BasicBlock *Parent = BI->getParent();
    bool Changed = false;

-    if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getTrue(TrueSucc->getContext()),
-                                   TrueSucc);
+    Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+    BasicBlockEdge TrueE(Parent, TrueSucc);
+    Changed |= propagateEquality(BranchCond, TrueVal, TrueE);

-    if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getFalse(FalseSucc->getContext()),
-                                   FalseSucc);
+    Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+    BasicBlockEdge FalseE(Parent, FalseSucc);
+    Changed |= propagateEquality(BranchCond, FalseVal, FalseE);

    return Changed;
  }
@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
         i != e; ++i) {
      BasicBlock *Dst = i.getCaseSuccessor();
-      if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
-        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+      BasicBlockEdge E(Parent, Dst);
+      if (E.isSingleEdge())
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
    }
    return Changed;
  }
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@ -39,20 +39,17 @@ static cl::opt<bool,true>
 VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
               cl::desc("Verify dominator info (time consuming)"));

-namespace llvm {
-  class BasicBlockEdge {
-    const BasicBlock *Start;
-    const BasicBlock *End;
-  public:
-    BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) :
-      Start(Start_), End(End_) { }
-    const BasicBlock *getStart() const {
-      return Start;
-    }
-    const BasicBlock *getEnd() const {
-      return End;
-    }
-  };
+bool BasicBlockEdge::isSingleEdge() const {
+  const TerminatorInst *TI = Start->getTerminator();
+  unsigned NumEdgesToEnd = 0;
+  for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+    if (TI->getSuccessor(i) == End)
+      ++NumEdgesToEnd;
+    if (NumEdgesToEnd >= 2)
+      return false;
+  }
+  assert(NumEdgesToEnd == 1);
+  return true;
 }

 //===----------------------------------------------------------------------===//
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@ -1093,7 +1093,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {

  // BitCast implies a no-op cast of type only. No bits change.
  // However, you can't cast pointers to anything but pointers.
-  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+  Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
          "Bitcast requires both operands to be pointer or neither", &I);
  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);

--- a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
@ -16,7 +16,7 @@
 ; CHECK: for.body:
 ; CHECK: %inc.9 = add i8 %inc.8, 1
 ; CHECK: %0 = add i8 %inc1, 10
-; CHEKC: br label %for.cond
+; CHECK: br label %for.cond

 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 define void @func() noreturn nounwind uwtable ssp {
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s

 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
@ -21,12 +21,6 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
  %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
  %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
  %inc = add i32 %i.022, 1
-  br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i
-
-if.then.i:                                        ; preds = %for.body
-  unreachable
-
-_Z14printIsNotZeroi.exit:                         ; preds = %for.body
  %tmp8 = load i32* %x, align 4, !tbaa !0
  %tmp11 = load i32* %y, align 4, !tbaa !0
  %mul = mul nsw i32 %tmp11, %tmp8
@ -37,7 +31,7 @@ if.then.i16:                                      ; preds = %_Z14printIsNotZeroi
  unreachable

 _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
-  br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end
+  br label %_Z14printIsNotZeroi.exit17.for.body_crit_edge

 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
  %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf

 ; CHECK:      vstmia  {{.*}}
@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf

 ; CHECK:      vstmia  {{.*}}
@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f

 ; CHECK:      vstmia  {{.*}}
@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f

 ; CHECK:      vstmia  {{.*}}
@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf

 ; CHECK:      vstmia  {{.*}}
@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f

 ; CHECK:      vstmia  {{.*}}
@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf

 ; CHECK:      vstmia  {{.*}}
@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf

-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf

 ; CHECK:      vstmia  {{.*}}
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@ -2,6 +2,8 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP

 define i32 @t0(i1 zeroext %a) nounwind {
  %1 = zext i1 %a to i32
@ -221,3 +223,67 @@ entry:
 }

 declare i32 @CallVariadic(i32, ...)
+
+; Test fastcc
+
+define fastcc void @fast_callee(float %i) ssp {
+entry:
+; ARM: fast_callee
+; ARM: vmov r0, s0
+; THUMB: fast_callee
+; THUMB: vmov r0, s0
+; ARM-NOVFP: fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @fast_caller() ssp {
+entry:
+; ARM: fast_caller
+; ARM: vldr s0,
+; THUMB: fast_caller
+; THUMB: vldr s0,
+; ARM-NOVFP: fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call fastcc void @fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+define void @no_fast_callee(float %i) ssp {
+entry:
+; ARM: no_fast_callee
+; ARM: vmov s0, r0
+; THUMB: no_fast_callee
+; THUMB: vmov s0, r0
+; ARM-NOVFP: no_fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: no_fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @no_fast_caller() ssp {
+entry:
+; ARM: no_fast_caller
+; ARM: vmov r0, s0
+; THUMB: no_fast_caller
+; THUMB: vmov r0, s0
+; ARM-NOVFP: no_fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: no_fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call void @no_fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+declare void @print(float)
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@ -15,14 +15,14 @@ entry:
  %1 = load i16* @y, align 2
  %2 = tail call float @llvm.convert.from.fp16(i16 %0)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
  %3 = tail call float @llvm.convert.from.fp16(i16 %1)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
  %4 = fadd float %2, %3
  %5 = tail call i16 @llvm.convert.to.fp16(float %4)
 ; CHECK: __gnu_f2h_ieee
-; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f16.f32
  store i16 %5, i16* @x, align 2
  ret void
 }
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@ -76,12 +76,11 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw    [[R3:r[0-9]+]], #1123
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
-; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
-; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
-; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
+; CHECK-NEON-NEXT: it      eq
+; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx

--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@ -4,13 +4,13 @@

 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; ARM: t1:
-; ARM: sub r0, r1, #-2147483647
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #-2147483647
+; ARM: mov r0, r1

 ; T2: t1:
 ; T2: mvn r0, #-2147483648
-; T2: add r0, r1
-; T2: movgt r0, r1
+; T2: addle.w r1, r1
+; T2: mov r0, r1
  %tmp1 = icmp sgt i32 %c, 10
  %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
  %tmp3 = add i32 %tmp2, %b
@ -19,12 +19,12 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {

 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; ARM: t2:
-; ARM: sub r0, r1, #10
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #10
+; ARM: mov r0, r1

 ; T2: t2:
-; T2: sub.w r0, r1, #10
-; T2: movgt r0, r1
+; T2: suble.w r1, r1, #10
+; T2: mov r0, r1
  %tmp1 = icmp sgt i32 %c, 10
  %tmp2 = select i1 %tmp1, i32 0, i32 10
  %tmp3 = sub i32 %b, %tmp2
@ -104,3 +104,78 @@ entry:
  ret i32 %tmp3
 }

+; Fold ORRri into movcc.
+define i32 @t8(i32 %a, i32 %b) nounwind {
+; ARM: t8:
+; ARM: cmp r0, r1
+; ARM: orrge r0, r1, #1
+
+; T2: t8:
+; T2: cmp r0, r1
+; T2: orrge r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ANDrr into movcc.
+define i32 @t9(i32 %a, i32 %b, i32 %c) nounwind {
+; ARM: t9:
+; ARM: cmp r0, r1
+; ARM: andge r0, r1, r2
+
+; T2: t9:
+; T2: cmp r0, r1
+; T2: andge.w r0, r1, r2
+  %x = and i32 %b, %c
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold EORrs into movcc.
+define i32 @t10(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t10:
+; ARM: cmp r0, r1
+; ARM: eorge r0, r1, r2, lsl #7
+
+; T2: t10:
+; T2: cmp r0, r1
+; T2: eorge.w r0, r1, r2, lsl #7
+  %s = shl i32 %c, 7
+  %x = xor i32 %b, %s
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ORRri into movcc, reversing the condition.
+define i32 @t11(i32 %a, i32 %b) nounwind {
+; ARM: t11:
+; ARM: cmp r0, r1
+; ARM: orrlt r0, r1, #1
+
+; T2: t11:
+; T2: cmp r0, r1
+; T2: orrlt r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %x, i32 %a
+  ret i32 %tmp1
+}
+
+; Fold ADDri12 into movcc
+define i32 @t12(i32 %a, i32 %b) nounwind {
+; ARM: t12:
+; ARM: cmp r0, r1
+; ARM: addge r0, r1,
+
+; T2: t12:
+; T2: cmp r0, r1
+; T2: addwge r0, r1, #3000
+  %x = add i32 %b, 3000
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@ -1,25 +1,25 @@
-; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
-; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED

 ; rdar://7113725
+; rdar://12091029

 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-; GENERIC: t:
-; GENERIC: ldrb [[R2:r[0-9]+]]
-; GENERIC: ldrb [[R3:r[0-9]+]]
-; GENERIC: ldrb [[R12:r[0-9]+]]
-; GENERIC: ldrb [[R1:r[0-9]+]]
-; GENERIC: strb [[R1]]
-; GENERIC: strb [[R12]]
-; GENERIC: strb [[R3]]
-; GENERIC: strb [[R2]]
+; EXPANDED: t:
+; EXPANDED: ldrb [[R2:r[0-9]+]]
+; EXPANDED: ldrb [[R3:r[0-9]+]]
+; EXPANDED: ldrb [[R12:r[0-9]+]]
+; EXPANDED: ldrb [[R1:r[0-9]+]]
+; EXPANDED: strb [[R1]]
+; EXPANDED: strb [[R12]]
+; EXPANDED: strb [[R3]]
+; EXPANDED: strb [[R2]]

-; DARWIN_V6: t:
-; DARWIN_V6: ldr r1
-; DARWIN_V6: str r1
+; UNALIGNED: t:
+; UNALIGNED: ldr r1
+; UNALIGNED: str r1

  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
@ -27,3 +27,35 @@ entry:
  store i32 %tmp.i, i32* %__dest2.i, align 1
  ret void
 }
+
+define void @hword(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: hword:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrh
+; EXPANDED-NOT: str1
+; EXPANDED: strh
+
+; UNALIGNED: hword:
+; UNALIGNED: vld1.16
+; UNALIGNED: vst1.16
+  %tmp = load double* %a, align 2
+  store double %tmp, double* %b, align 2
+  ret void
+}
+
+define void @byte(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: byte:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrb
+; EXPANDED-NOT: str1
+; EXPANDED: strb
+
+; UNALIGNED: byte:
+; UNALIGNED: vld1.8
+; UNALIGNED: vst1.8
+  %tmp = load double* %a, align 1
+  store double %tmp, double* %b, align 1
+  ret void
+}
--- a/test/CodeGen/Generic/donothing.ll
+++ b/test/CodeGen/Generic/donothing.ll
@ -7,7 +7,7 @@ declare void @llvm.donothing() readnone
 ; CHECK: f1
 define void @f1() nounwind uwtable ssp {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
  invoke void @llvm.donothing()
  to label %invoke.cont unwind label %lpad

@ -25,7 +25,7 @@ lpad:
 ; CHECK: f2
 define void @f2() nounwind {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
  call void @llvm.donothing()
  ret void
 }
--- a/test/CodeGen/Mips/return-vector-float4.ll
+++ b/test/CodeGen/Mips/return-vector-float4.ll
@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel -mattr=+android < %s | FileCheck %s
+
+define <4 x float> @retvec4() nounwind readnone {
+entry:
+; CHECK: lwc1 $f0
+; CHECK: lwc1 $f2
+; CHECK: lwc1 $f1
+; CHECK: lwc1 $f3
+
+  ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+}
+
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@ -95,7 +95,7 @@ bb.nph:
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
 ; CHECK: eor.w
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eorne.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
  %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@ -4,9 +4,9 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: add r0, r1
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, r0
+; CHECK: mov r0, r1
        %tmp1 = icmp sgt i32 %c, 10
        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
        %tmp3 = add i32 %tmp2, %b
@ -15,10 +15,10 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {

 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t2
-; CHECK: add.w r0, r1, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, #-2147483648
+; CHECK: mov r0, r1

        %tmp1 = icmp sgt i32 %c, 10
        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
@ -28,10 +28,10 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {

 define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t3
-; CHECK: sub.w r0, r1, #10
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: suble.w r1, r1, #10
+; CHECK: mov r0, r1
        %tmp1 = icmp sgt i32 %c, 10
        %tmp2 = select i1 %tmp1, i32 0, i32 10
        %tmp3 = sub i32 %b, %tmp2
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@ -3,7 +3,7 @@
 ; PR5329

@llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .section        .ctors.64535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_1
 ; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_2
--- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; Check that an overly large immediate created by SROA doesn't crash the
+; legalizer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._GtkSheetRow = type { i32*, i32, i32, i32, %struct._GtkSheetButton, i32, i32 }
+%struct._GtkSheetButton = type { i32, i32*, i32, i32*, i32 }
+
+@a = common global %struct._GtkSheetRow* null, align 8
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+  %0 = load %struct._GtkSheetRow** @a, align 8
+  %1 = bitcast %struct._GtkSheetRow* %0 to i576*
+  %srcval2 = load i576* %1, align 8
+  %tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i576 %srcval2, i576* %1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+
+; CHECK: fn1:
+; CHECK: shrq $32, [[REG:%.*]]
+; CHECK: testq [[REG]], [[REG]]
+; CHECK: je
+}
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@ -57,6 +57,6 @@ entry:
 ; CHECK: subl $28
 ; CHECK: leal (%esp), %ecx
 ; CHECK: calll _test4fastccsret
-; CHECK addl $28
+; CHECK: addl $28
 }
 declare fastcc void @test4fastccsret(%struct.a* sret)
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@ -10,7 +10,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C,
 ; CHECK: foo
 ; CHECK: addl
 ; CHECK: addl
-; CEHCK: addl
+; CHECK: addl

 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@ -65,18 +65,18 @@ entry:
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
  ret void
 ; LINUX: test4:
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
 }


--- a/test/CodeGen/X86/pr11334.ll
+++ b/test/CodeGen/X86/pr11334.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
+entry:
+; CHECK: v2f2d_ext_vec
+; CHECK: cvtps2pd
+; AVX:   v2f2d_ext_vec
+; AVX:   vcvtps2pd
+  %f1 = fpext <2 x float> %v1 to <2 x double>
+  ret <2 x double> %f1
+}
+
+define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
+entry:
+; CHECK: v3f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v3f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <3 x float> %v1 to <3 x double>
+  ret <3 x double> %f1
+}
+
+define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
+entry:
+; CHECK: v4f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v4f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <4 x float> %v1 to <4 x double>
+  ret <4 x double> %f1
+}
+
+define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
+entry:
+; CHECK: v8f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v8f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   vextractf128
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <8 x float> %v1 to <8 x double>
+  ret <8 x double> %f1
+}
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@ -1,19 +0,0 @@
-; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
-
-define void @test5() nounwind optsize noinline ssp {
-entry:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-  %buf = alloca [64 x i8], align 16
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
-  br i1 false, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@ -23,15 +23,14 @@ define i32 @test_load(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[LOAD_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   %tmp1 = load i32* %a
-; CHECK:   ret i32 %tmp1
-
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+


 entry:
@ -57,15 +56,14 @@ define void @test_store(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[STORE_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   store i32 42, i32* %a
-; CHECK:   ret void
-;
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   store i32 42, i32* %a
+; CHECK:   ret void
+;

 entry:
  store i32 42, i32* %a
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@ -0,0 +1,60 @@
+; RUN: opt %s -gvn -S -o - | FileCheck %s
+
+define i32 @f1(i32 %x) {
+  ; CHECK: define i32 @f1(
+bb0:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f2(i32 %x) {
+  ; CHECK: define i32 @f2(
+bb0:
+  %cmp = icmp ne i32 %x, 0
+  br i1 %cmp, label %bb1, label %bb2
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f3(i32 %x) {
+  ; CHECK: define i32 @f3(
+bb0:
+  switch i32 %x, label %bb1 [ i32 0, label %bb2]
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+declare void @g(i1)
+define void @f4(i8 * %x)  {
+; CHECK: define void @f4(
+bb0:
+  %y = icmp eq i8* null, %x
+  br i1 %y, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %zed = icmp eq i8* null, %x
+  call void @g(i1 %zed)
+; CHECK: call void @g(i1 %y)
+  ret void
+}
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@ -620,7 +620,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i16*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }

 define i32 @test_widening2() nounwind ssp noredzone {
@ -644,7 +644,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i32*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }

 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@ -33,7 +33,6 @@ define void @outer2(i32 %N) {
 ;
 ; CHECK: @outer2
 ; CHECK-NOT: call void @inner2
-; CHECK alloca i32, i32 %N
 ; CHECK-NOT: call void @inner2
 ; CHECK: ret void

--- a/test/Transforms/InstCombine/memcpy.ll
+++ b/test/Transforms/InstCombine/memcpy.ll
@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s

 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

 define void @test1(i8* %a) {
        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
@ -17,3 +18,10 @@ define void @test2(i8* %a) {
 ; CHECK: define void @test2
 ; CHECK-NEXT: call void @llvm.memcpy
 }
+
+define void @test3(i8* %d, i8* %s) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
+        ret void
+; CHECK: define void @test3
+; CHECK-NEXT: call void @llvm.memcpy
+}
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@ -238,3 +238,20 @@ xpto:
 return:
  ret i32 42
 }
+
+; CHECK: @PR13621
+define i32 @PR13621(i1 %bool) nounwind {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %gep = getelementptr i8* %gep, i32 1
+  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+; CHECK: ret i32 undef
+  ret i32 %o
+
+return:
+  ret i32 7
+}
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@ -1272,7 +1272,7 @@ g:
 ; Delete retain,release pairs around loops.

 ; CHECK: define void @test39(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39(i8* %p) {
 entry:
@ -1290,7 +1290,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing uses.

 ; CHECK: define void @test39b(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39b(i8* %p) {
 entry:
@ -1309,7 +1309,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing potential decrements.

 ; CHECK: define void @test39c(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39c(i8* %p) {
 entry:
@ -1329,7 +1329,7 @@ exit:                                             ; preds = %loop
 ; the successors are in a different order.

 ; CHECK: define void @test40(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test40(i8* %p) {
 entry:
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@ -76,12 +76,12 @@ done:
 ; CHECK: define void @test2() {
 ; CHECK: invoke.cont:
 ; CHECK-NEXT: call i8* @objc_retain
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc_r
 ; CHECK: finally.cont:
 ; CHECK-NEXT: call void @objc_release
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: finally.rethrow:
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: }
 define void @test2() {
 entry:
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@ -648,6 +648,49 @@ TEST(APFloatTest, exactInverse) {
  EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(0));
 }

+TEST(APFloatTest, roundToIntegral) {
+  APFloat T(-0.5), S(3.14), R(APFloat::getLargest(APFloat::IEEEdouble)), P(0.0);
+
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(-1.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(4.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(3.0, P.convertToDouble());
+
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+}
+
 TEST(APFloatTest, getLargest) {
  EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle).convertToFloat());
  EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble).convertToDouble());
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@ -178,150 +178,150 @@ TEST(AlignOfTest, BasicAlignedArray) {
  // types because of the bugs mentioned above where GCC and Clang both
  // disregard the arbitrary alignment specifier until the type is used to
  // declare a member of a struct.
-  EXPECT_LE(1u, alignOf<AlignedCharArray<SA1>::union_type>());
-  EXPECT_LE(2u, alignOf<AlignedCharArray<SA2>::union_type>());
-  EXPECT_LE(4u, alignOf<AlignedCharArray<SA4>::union_type>());
-  EXPECT_LE(8u, alignOf<AlignedCharArray<SA8>::union_type>());
+  EXPECT_LE(1u, alignOf<AlignedCharArrayUnion<SA1> >());
+  EXPECT_LE(2u, alignOf<AlignedCharArrayUnion<SA2> >());
+  EXPECT_LE(4u, alignOf<AlignedCharArrayUnion<SA4> >());
+  EXPECT_LE(8u, alignOf<AlignedCharArrayUnion<SA8> >());

-  EXPECT_LE(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_LE(2u, sizeof(AlignedCharArray<SA2>::union_type));
-  EXPECT_LE(4u, sizeof(AlignedCharArray<SA4>::union_type));
-  EXPECT_LE(8u, sizeof(AlignedCharArray<SA8>::union_type));
+  EXPECT_LE(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_LE(2u, sizeof(AlignedCharArrayUnion<SA2>));
+  EXPECT_LE(4u, sizeof(AlignedCharArrayUnion<SA4>));
+  EXPECT_LE(8u, sizeof(AlignedCharArrayUnion<SA8>));

-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1, SA2>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1, SA2, SA4>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1, SA2, SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1, SA2> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4, SA8> >()));

-  EXPECT_EQ(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_EQ(2u, sizeof(AlignedCharArray<SA1, SA2>::union_type));
-  EXPECT_EQ(4u, sizeof(AlignedCharArray<SA1, SA2, SA4>::union_type));
-  EXPECT_EQ(8u, sizeof(AlignedCharArray<SA1, SA2, SA4, SA8>::union_type));
+  EXPECT_EQ(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_EQ(2u, sizeof(AlignedCharArrayUnion<SA1, SA2>));
+  EXPECT_EQ(4u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4>));
+  EXPECT_EQ(8u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4, SA8>));

-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1[1]>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1[2], SA2[1]>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1[42], SA2[55],
-                                          SA4[13]>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1[2], SA2[1],
-                                          SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1[1]> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1]> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1[42], SA2[55],
+                                               SA4[13]> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1],
+                                               SA4, SA8> >()));

-  EXPECT_EQ(1u,  sizeof(AlignedCharArray<SA1[1]>::union_type));
-  EXPECT_EQ(2u,  sizeof(AlignedCharArray<SA1[2], SA2[1]>::union_type));
-  EXPECT_EQ(4u,  sizeof(AlignedCharArray<SA1[3], SA2[2], SA4>::union_type));
-  EXPECT_EQ(16u, sizeof(AlignedCharArray<SA1, SA2[3],
-                                         SA4[3], SA8>::union_type));
+  EXPECT_EQ(1u,  sizeof(AlignedCharArrayUnion<SA1[1]>));
+  EXPECT_EQ(2u,  sizeof(AlignedCharArrayUnion<SA1[2], SA2[1]>));
+  EXPECT_EQ(4u,  sizeof(AlignedCharArrayUnion<SA1[3], SA2[2], SA4>));
+  EXPECT_EQ(16u, sizeof(AlignedCharArrayUnion<SA1, SA2[3],
+                                              SA4[3], SA8>));

  // For other tests we simply assert that the alignment of the union mathes
  // that of the fundamental type and hope that we have any weird type
  // productions that would trigger bugs.
-  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArray<char>::union_type>());
-  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArray<short>::union_type>());
-  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArray<int>::union_type>());
-  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArray<long>::union_type>());
+  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArrayUnion<char> >());
+  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArrayUnion<short> >());
+  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArrayUnion<int> >());
+  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArrayUnion<long> >());
  EXPECT_EQ(alignOf<long long>(),
-            alignOf<AlignedCharArray<long long>::union_type>());
-  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArray<float>::union_type>());
-  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArray<double>::union_type>());
+            alignOf<AlignedCharArrayUnion<long long> >());
+  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArrayUnion<float> >());
+  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArrayUnion<double> >());
  EXPECT_EQ(alignOf<long double>(),
-            alignOf<AlignedCharArray<long double>::union_type>());
-  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArray<void *>::union_type>());
-  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArray<int *>::union_type>());
+            alignOf<AlignedCharArrayUnion<long double> >());
+  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArrayUnion<void *> >());
+  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArrayUnion<int *> >());
  EXPECT_EQ(alignOf<double (*)(double)>(),
-            alignOf<AlignedCharArray<double (*)(double)>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (*)(double)> >());
  EXPECT_EQ(alignOf<double (S6::*)()>(),
-            alignOf<AlignedCharArray<double (S6::*)()>::union_type>());
-  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArray<S1>::union_type>());
-  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArray<S2>::union_type>());
-  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArray<S3>::union_type>());
-  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArray<S4>::union_type>());
-  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArray<S5>::union_type>());
-  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArray<S6>::union_type>());
-  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArray<D1>::union_type>());
-  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArray<D2>::union_type>());
-  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArray<D3>::union_type>());
-  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArray<D4>::union_type>());
-  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArray<D5>::union_type>());
-  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArray<D6>::union_type>());
-  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArray<D7>::union_type>());
-  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArray<D8>::union_type>());
-  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArray<D9>::union_type>());
-  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArray<V1>::union_type>());
-  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArray<V2>::union_type>());
-  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArray<V3>::union_type>());
-  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArray<V4>::union_type>());
-  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArray<V5>::union_type>());
-  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArray<V6>::union_type>());
-  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArray<V7>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (S6::*)()> >());
+  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArrayUnion<S1> >());
+  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArrayUnion<S2> >());
+  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArrayUnion<S3> >());
+  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArrayUnion<S4> >());
+  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArrayUnion<S5> >());
+  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArrayUnion<S6> >());
+  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArrayUnion<D1> >());
+  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArrayUnion<D2> >());
+  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArrayUnion<D3> >());
+  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArrayUnion<D4> >());
+  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArrayUnion<D5> >());
+  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArrayUnion<D6> >());
+  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArrayUnion<D7> >());
+  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArrayUnion<D8> >());
+  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArrayUnion<D9> >());
+  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArrayUnion<V1> >());
+  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArrayUnion<V2> >());
+  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArrayUnion<V3> >());
+  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArrayUnion<V4> >());
+  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArrayUnion<V5> >());
+  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArrayUnion<V6> >());
+  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArrayUnion<V7> >());

  // Some versions of MSVC get this wrong somewhat disturbingly. The failure
  // appears to be benign: alignOf<V8>() produces a preposterous value: 12
 #ifndef _MSC_VER
-  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArray<V8>::union_type>());
+  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArrayUnion<V8> >());
 #endif

-  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArray<char>::union_type));
-  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArray<char[1]>::union_type));
-  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArray<char[2]>::union_type));
-  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArray<char[3]>::union_type));
-  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArray<char[4]>::union_type));
-  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArray<char[5]>::union_type));
-  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArray<char[8]>::union_type));
-  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArray<char[13]>::union_type));
-  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArray<char[16]>::union_type));
-  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArray<char[21]>::union_type));
-  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArray<char[32]>::union_type));
-  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArray<short>::union_type));
-  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArray<int>::union_type));
-  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArray<long>::union_type));
+  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArrayUnion<char>));
+  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArrayUnion<char[1]>));
+  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArrayUnion<char[2]>));
+  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArrayUnion<char[3]>));
+  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArrayUnion<char[4]>));
+  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArrayUnion<char[5]>));
+  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArrayUnion<char[8]>));
+  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArrayUnion<char[13]>));
+  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArrayUnion<char[16]>));
+  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArrayUnion<char[21]>));
+  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArrayUnion<char[32]>));
+  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArrayUnion<short>));
+  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArrayUnion<int>));
+  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArrayUnion<long>));
  EXPECT_EQ(sizeof(long long),
-            sizeof(AlignedCharArray<long long>::union_type));
-  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArray<float>::union_type));
-  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArray<double>::union_type));
+            sizeof(AlignedCharArrayUnion<long long>));
+  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArrayUnion<float>));
+  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArrayUnion<double>));
  EXPECT_EQ(sizeof(long double),
-            sizeof(AlignedCharArray<long double>::union_type));
-  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArray<void *>::union_type));
-  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArray<int *>::union_type));
+            sizeof(AlignedCharArrayUnion<long double>));
+  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArrayUnion<void *>));
+  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArrayUnion<int *>));
  EXPECT_EQ(sizeof(double (*)(double)),
-            sizeof(AlignedCharArray<double (*)(double)>::union_type));
+            sizeof(AlignedCharArrayUnion<double (*)(double)>));
  EXPECT_EQ(sizeof(double (S6::*)()),
-            sizeof(AlignedCharArray<double (S6::*)()>::union_type));
-  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArray<S1>::union_type));
-  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArray<S2>::union_type));
-  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArray<S3>::union_type));
-  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArray<S4>::union_type));
-  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArray<S5>::union_type));
-  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArray<S6>::union_type));
-  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArray<D1>::union_type));
-  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArray<D2>::union_type));
-  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArray<D3>::union_type));
-  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArray<D4>::union_type));
-  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArray<D5>::union_type));
-  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArray<D6>::union_type));
-  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArray<D7>::union_type));
-  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArray<D8>::union_type));
-  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArray<D9>::union_type));
-  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArray<D9[1]>::union_type));
-  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArray<D9[2]>::union_type));
-  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArray<D9[3]>::union_type));
-  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArray<D9[4]>::union_type));
-  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArray<D9[5]>::union_type));
-  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArray<D9[8]>::union_type));
-  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArray<D9[13]>::union_type));
-  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArray<D9[16]>::union_type));
-  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArray<D9[21]>::union_type));
-  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArray<D9[32]>::union_type));
-  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArray<V1>::union_type));
-  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArray<V2>::union_type));
-  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArray<V3>::union_type));
-  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArray<V4>::union_type));
-  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArray<V5>::union_type));
-  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArray<V6>::union_type));
-  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArray<V7>::union_type));
+            sizeof(AlignedCharArrayUnion<double (S6::*)()>));
+  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArrayUnion<S1>));
+  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArrayUnion<S2>));
+  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArrayUnion<S3>));
+  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArrayUnion<S4>));
+  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArrayUnion<S5>));
+  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArrayUnion<S6>));
+  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArrayUnion<D1>));
+  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArrayUnion<D2>));
+  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArrayUnion<D3>));
+  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArrayUnion<D4>));
+  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArrayUnion<D5>));
+  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArrayUnion<D6>));
+  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArrayUnion<D7>));
+  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArrayUnion<D8>));
+  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArrayUnion<D9>));
+  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArrayUnion<D9[1]>));
+  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArrayUnion<D9[2]>));
+  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArrayUnion<D9[3]>));
+  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArrayUnion<D9[4]>));
+  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArrayUnion<D9[5]>));
+  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArrayUnion<D9[8]>));
+  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArrayUnion<D9[13]>));
+  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArrayUnion<D9[16]>));
+  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArrayUnion<D9[21]>));
+  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArrayUnion<D9[32]>));
+  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArrayUnion<V1>));
+  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArrayUnion<V2>));
+  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArrayUnion<V3>));
+  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArrayUnion<V4>));
+  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArrayUnion<V5>));
+  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArrayUnion<V6>));
+  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArrayUnion<V7>));

  // Some versions of MSVC also get this wrong. The failure again appears to be
  // benign: sizeof(V8) is only 52 bytes, but our array reserves 56.
 #ifndef _MSC_VER
-  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArray<V8>::union_type));
+  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArrayUnion<V8>));
 #endif
 }

--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@ -340,44 +340,51 @@ TEST_F(FileSystemTest, Permissions) {
 }
 #endif

-#if !defined(_WIN32) // FIXME: temporary suppressed.
 TEST_F(FileSystemTest, FileMapping) {
  // Create a temp file.
  int FileDescriptor;
  SmallString<64> TempPath;
  ASSERT_NO_ERROR(
    fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath));
-
-  // Grow temp file to be 4096 bytes 
-  ASSERT_NO_ERROR(sys::fs::resize_file(Twine(TempPath), 4096));
-  
  // Map in temp file and add some content
-  void* MappedMemory;
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                true /*writable*/, MappedMemory));
-  char* Memory = reinterpret_cast<char*>(MappedMemory);
-  strcpy(Memory, "hello there");
-  
-  // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
+  error_code EC;
+  StringRef Val("hello there");
+  {
+    fs::mapped_file_region mfr(FileDescriptor,
+                               fs::mapped_file_region::readwrite,
+                               4096,
+                               0,
+                               EC);
+    ASSERT_NO_ERROR(EC);
+    std::copy(Val.begin(), Val.end(), mfr.data());
+    // Explicitly add a 0.
+    mfr.data()[Val.size()] = 0;
+    // Unmap temp file
+  }
  
  // Map it back in read-only
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                false /*read-only*/, MappedMemory));
+  fs::mapped_file_region mfr(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
  
  // Verify content
-  Memory = reinterpret_cast<char*>(MappedMemory);
-  bool SAME = (strcmp(Memory, "hello there") == 0);
-  EXPECT_TRUE(SAME);
+  EXPECT_EQ(StringRef(mfr.const_data()), Val);
  
  // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
-}
+
+#if LLVM_USE_RVALUE_REFERENCES
+  fs::mapped_file_region m(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
+  const char *Data = m.const_data();
+  fs::mapped_file_region mfrrv(llvm_move(m));
+  EXPECT_EQ(mfrrv.const_data(), Data);
 #endif
-
-
+}
 } // anonymous namespace
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@ -297,6 +297,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
  isCompare    = R->getValueAsBit("isCompare");
  isMoveImm    = R->getValueAsBit("isMoveImm");
  isBitcast    = R->getValueAsBit("isBitcast");
+  isSelect     = R->getValueAsBit("isSelect");
  isBarrier    = R->getValueAsBit("isBarrier");
  isCall       = R->getValueAsBit("isCall");
  canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@ -222,6 +222,7 @@ namespace llvm {
    bool isCompare;
    bool isMoveImm;
    bool isBitcast;
+    bool isSelect;
    bool isBarrier;
    bool isCall;
    bool canFoldAsLoad;
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@ -28,19 +28,15 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//

 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R),
-    EnumValue(Enum)
-{}
-
-std::string CodeGenSubRegIndex::getNamespace() const {
-  if (TheDef->getValue("Namespace"))
-    return TheDef->getValueAsString("Namespace");
-  else
-    return "";
+  : TheDef(R), EnumValue(Enum) {
+  Name = R->getName();
+  if (R->getValue("Namespace"))
+    Namespace = R->getValueAsString("Namespace");
 }

-const std::string &CodeGenSubRegIndex::getName() const {
-  return TheDef->getName();
+CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
+                                       unsigned Enum)
+  : TheDef(0), Name(N), Namespace(Nspace), EnumValue(Enum) {
 }

 std::string CodeGenSubRegIndex::getQualifiedName() const {
@ -52,16 +48,31 @@ std::string CodeGenSubRegIndex::getQualifiedName() const {
 }

 void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
-  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
-  if (Comps.empty())
+  if (!TheDef)
    return;
-  if (Comps.size() != 2)
-    throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
-  CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
-  CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
-  CodeGenSubRegIndex *X = A->addComposite(B, this);
-  if (X)
-    throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+
+  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
+  if (!Comps.empty()) {
+    if (Comps.size() != 2)
+      throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+    CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
+    CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
+    CodeGenSubRegIndex *X = A->addComposite(B, this);
+    if (X)
+      throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+  }
+
+  std::vector<Record*> Parts =
+    TheDef->getValueAsListOfDefs("CoveringSubRegIndices");
+  if (!Parts.empty()) {
+    if (Parts.size() < 2)
+      throw TGError(TheDef->getLoc(),
+                    "CoveredBySubRegs must have two or more entries");
+    SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
+    for (unsigned i = 0, e = Parts.size(); i != e; ++i)
+      IdxParts.push_back(RegBank.getSubRegIdx(Parts[i]));
+    RegBank.addConcatSubRegIndex(IdxParts, this);
+  }
 }

 void CodeGenSubRegIndex::cleanComposites() {
@ -937,7 +948,7 @@ void CodeGenRegisterClass::buildRegUnitSet(
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//

-CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
+CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
  // Configure register Sets to understand register classes and tuples.
  Sets.addFieldExpander("RegisterClass", "MemberList");
  Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
@ -947,7 +958,6 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
  // More indices will be synthesized later.
  std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
  std::sort(SRIs.begin(), SRIs.end(), LessRecord());
-  NumNamedIndices = SRIs.size();
  for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
    getSubRegIdx(SRIs[i]);
  // Build composite maps from ComposedOf fields.
@ -1015,6 +1025,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
  CodeGenRegisterClass::computeSubClasses(*this);
 }

+// Create a synthetic CodeGenSubRegIndex without a corresponding Record.
+CodeGenSubRegIndex*
+CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) {
+  CodeGenSubRegIndex *Idx = new CodeGenSubRegIndex(Name, Namespace,
+                                                   SubRegIndices.size() + 1);
+  SubRegIndices.push_back(Idx);
+  return Idx;
+}
+
 CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
  CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def];
  if (Idx)
@ -1079,7 +1098,7 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,

  // None exists, synthesize one.
  std::string Name = A->getName() + "_then_" + B->getName();
-  Comp = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  Comp = createSubRegIndex(Name, A->getNamespace());
  A->addComposite(B, Comp);
  return Comp;
 }
@ -1099,7 +1118,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex*, 8> &Parts) {
    Name += '_';
    Name += Parts[i]->getName();
  }
-  return Idx = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  return Idx = createSubRegIndex(Name, Parts.front()->getNamespace());
 }

 void CodeGenRegBank::computeComposites() {
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@ -35,14 +35,17 @@ namespace llvm {
  /// CodeGenSubRegIndex - Represents a sub-register index.
  class CodeGenSubRegIndex {
    Record *const TheDef;
+    std::string Name;
+    std::string Namespace;

  public:
    const unsigned EnumValue;

    CodeGenSubRegIndex(Record *R, unsigned Enum);
+    CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum);

-    const std::string &getName() const;
-    std::string getNamespace() const;
+    const std::string &getName() const { return Name; }
+    const std::string &getNamespace() const { return Namespace; }
    std::string getQualifiedName() const;

    // Order CodeGenSubRegIndex pointers by EnumValue.
@ -422,13 +425,13 @@ namespace llvm {
  // CodeGenRegBank - Represent a target's registers and the relations between
  // them.
  class CodeGenRegBank {
-    RecordKeeper &Records;
    SetTheory Sets;

    // SubRegIndices.
    std::vector<CodeGenSubRegIndex*> SubRegIndices;
    DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
-    unsigned NumNamedIndices;
+
+    CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace);

    typedef std::map<SmallVector<CodeGenSubRegIndex*, 8>,
                     CodeGenSubRegIndex*> ConcatIdxMap;
@ -495,7 +498,6 @@ namespace llvm {
    // in the .td files. The rest are synthesized such that all sub-registers
    // have a unique name.
    ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; }
-    unsigned getNumNamedIndices() { return NumNamedIndices; }

    // Find a SubRegIndex form its Record def.
    CodeGenSubRegIndex *getSubRegIdx(Record*);
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@ -319,6 +319,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
  if (Inst.isCompare)          OS << "|(1<<MCID::Compare)";
  if (Inst.isMoveImm)          OS << "|(1<<MCID::MoveImm)";
  if (Inst.isBitcast)          OS << "|(1<<MCID::Bitcast)";
+  if (Inst.isSelect)           OS << "|(1<<MCID::Select)";
  if (Inst.isBarrier)          OS << "|(1<<MCID::Barrier)";
  if (Inst.hasDelaySlot)       OS << "|(1<<MCID::DelaySlot)";
  if (Inst.isCall)             OS << "|(1<<MCID::Call)";
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@ -145,9 +145,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
    if (!Namespace.empty())
      OS << "namespace " << Namespace << " {\n";
    OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS\n};\n";
+    OS << "  NUM_TARGET_SUBREGS\n};\n";
    if (!Namespace.empty())
      OS << "}\n";
  }
@ -885,17 +885,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
  }
  OS << "\" };\n\n";

-  // Emit names of the anonymous subreg indices.
-  unsigned NamedIndices = RegBank.getNumNamedIndices();
-  if (SubRegIndices.size() > NamedIndices) {
-    OS << "  enum {";
-    for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) {
-      OS << "\n    " << SubRegIndices[i]->getName() << " = " << i+1;
-      if (i+1 != e)
-        OS << ',';
-    }
-    OS << "\n  };\n\n";
-  }
  OS << "\n";

  // Now that all of the structs have been emitted, emit the instances.
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@ -566,6 +566,9 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
    if opts.maxTests is not None:
        tests = tests[:opts.maxTests]

+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
    extra = ''
    if len(tests) != numTotalTests:
        extra = ' of %d' % numTotalTests
@ -589,9 +592,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
        else:
            print header

-    # Don't create more threads than tests.
-    opts.numThreads = min(len(tests), opts.numThreads)
-
    startTime = time.time()
    display = TestingProgressDisplay(opts, len(tests), progressBar)
    provider = TestProvider(tests, opts.maxTime)