Vendor import of llvm RELEASE_351/final tag r225668 (effectively, 3.5.1 release):

https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_351/final@225668
2015-01-15 22:30:16 +00:00 · 2015-01-15 22:30:16 +00:00 · 9f61947910
commit 9f61947910
parent 5ca98fd987
125 changed files with 5242 additions and 1519 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -27,7 +27,7 @@ set(CMAKE_MODULE_PATH

 set(LLVM_VERSION_MAJOR 3)
 set(LLVM_VERSION_MINOR 5)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)

 if (NOT PACKAGE_VERSION)
  set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}svn")
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.

-AC_INIT([LLVM],[3.5.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.5.1],[http://llvm.org/bugs/])

 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=5
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=

 AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
--- a/cmake/modules/Makefile
+++ b/cmake/modules/Makefile
@ -33,6 +33,19 @@ else
 	LLVM_ENABLE_RTTI := 0
 endif

+# Don't try to run llvm-config during clean because it won't be available
+ifneq ($(MAKECMDGOALS),clean)
+LLVM_LIBS_TO_EXPORT := $(subst -l,,$(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error))
+
+ifeq ($(LLVM_LIBS_TO_EXPORT),Error)
+$(error llvm-config --libs failed)
+endif
+
+ifndef LLVM_LIBS_TO_EXPORT
+$(error LLVM_LIBS_TO_EXPORT cannot be empty)
+endif
+endif
+
 OBJMODS := LLVMConfig.cmake LLVMConfigVersion.cmake LLVMExports.cmake

 $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in $(LLVMBuildCMakeFrag)
@ -45,7 +58,7 @@ $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in $(LLVMBuildCMakeFrag)
 	  -e 's/@LLVM_VERSION_PATCH@/'"$(LLVM_VERSION_PATCH)"'/' \
 	  -e 's/@PACKAGE_VERSION@/'"$(LLVMVersion)"'/' \
 	  -e 's/@LLVM_COMMON_DEPENDS@//' \
-	  -e 's/@LLVM_AVAILABLE_LIBS@/'"$(subst -l,,$(LLVMConfigLibs))"'/' \
+	  -e 's/@LLVM_AVAILABLE_LIBS@/'"$(LLVM_LIBS_TO_EXPORT)"'/' \
 	  -e 's/@LLVM_ALL_TARGETS@/'"$(ALL_TARGETS)"'/' \
 	  -e 's/@LLVM_TARGETS_TO_BUILD@/'"$(TARGETS_TO_BUILD)"'/' \
 	  -e 's/@LLVM_TARGETS_WITH_JIT@/'"$(TARGETS_WITH_JIT)"'/' \
@ -83,7 +96,7 @@ $(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag)
 	$(Echo) 'Generating LLVM CMake target exports file'
 	$(Verb) ( \
 	  echo '# LLVM CMake target exports.  Do not include directly.' && \
-	  for lib in $(subst -l,,$(LLVMConfigLibs)); do \
+	  for lib in $(LLVM_LIBS_TO_EXPORT); do \
 	    echo 'add_library('"$$lib"' STATIC IMPORTED)' && \
 	    echo 'set_property(TARGET '"$$lib"' PROPERTY IMPORTED_LOCATION "'"$(PROJ_libdir)/lib$$lib.a"'")' ; \
 	  done && \
--- a/20
+++ b/20
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.5.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.5.1.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.5.0'
-PACKAGE_STRING='LLVM 3.5.0'
+PACKAGE_VERSION='3.5.1'
+PACKAGE_STRING='LLVM 3.5.1'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'

 ac_unique_file="lib/IR/Module.cpp"
@ -1316,7 +1316,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures LLVM 3.5.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.5.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1382,7 +1382,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.5.0:";;
+     short | recursive ) echo "Configuration of LLVM 3.5.1:";;
   esac
  cat <<\_ACEOF

@ -1553,7 +1553,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-LLVM configure 3.5.0
+LLVM configure 3.5.1
 generated by GNU Autoconf 2.60

 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -1569,7 +1569,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by LLVM $as_me 3.5.0, which was
+It was created by LLVM $as_me 3.5.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was

  $ $0 $@
@ -1925,7 +1925,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu

 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=5
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=


@ -19245,7 +19245,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.5.0, which was
+This file was extended by LLVM $as_me 3.5.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -19298,7 +19298,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.5.0
+LLVM config.status 3.5.1
 configured by $0, generated by GNU Autoconf 2.60,
  with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@ -24,6 +24,43 @@ them.
 Non-comprehensive list of changes in this release
 =================================================

+Changes to the MIPS Target
+--------------------------
+
+* A large number of bugs have been fixed for big-endian Mips targets using the
+  N32 and N64 ABI's. Please note that some of these bugs will still affect
+  LLVM-IR generated by LLVM 3.5 since correct code generation depends on
+  appropriate usage of the ``inreg``, ``signext``, and ``zeroext`` attributes
+  on all function arguments and returns.
+
+* The registers used to return a structure containing a single 128-bit floating
+  point member on the N32/N64 ABI's have been changed from those specified by
+  the ABI documentation to match those used by GCC. The documentation specifies
+  that ``$f0`` and ``$f2`` should be used but GCC has used ``$f0`` and ``$f1``
+  for many years.
+
+* Returning a zero-byte struct no longer causes incorrect code generation when
+  using the O32 ABI.
+
+* Passing structures of less than 32-bits using the O32 ABI on a big-endian
+  target has been fixed.
+
+* The exception personality has been changed for 64-bit Mips targets to
+  eliminate warnings about relocations in a read-only section.
+
+* Incorrect usage of odd-numbered single-precision floating point registers
+  has been fixed when the fastcc calling convention is used with 64-bit FPU's
+  and -mno-odd-spreg.
+
+* For inline assembly, the 'z' print-modifier print modifier can now be used on
+  non-immediate values.
+
+* Attempting to disassemble l[wd]c[23], s[wd]c[23], cache, and pref no longer
+  triggers an assertion.
+
+Non-comprehensive list of changes in 3.5
+========================================
+
 * All backends have been changed to use the MC asm printer and support for the
  non MC one has been removed.

@ -217,6 +254,37 @@ We had also decided that the name of the combined backend should be AArch64,
 following ARM's official documentation. So, at the end of May the old
 AArch64 directory was removed, and ARM64 renamed into its place.

+Changes to the PowerPC Target
+-----------------------------
+
+The PowerPC 64-bit Little Endian subtarget (powerpc64le-unknown-linux-gnu) is
+now fully supported.  This includes support for the Altivec instruction set.
+
+The Power Architecture 64-Bit ELFv2 ABI Specification is now supported, and
+is the default ABI for Little Endian.  The ELFv1 ABI remains the default ABI
+for Big Endian.  Currently, it is not possible to override these defaults.
+That capability will be available (albeit not recommended) in a future release.
+
+Links to the ELFv2 ABI specification and to the Power ISA Version 2.07
+specification may be found `here <https://www-03.ibm.com/technologyconnect/tgcm/TGCMServlet.wss?alias=OpenPOWER&linkid=1n0000>`_ (free registration required).
+Efforts are underway to move this to a location that doesn't require
+registration, but the planned site isn't ready yet.
+
+Experimental support for the VSX instruction set introduced with ISA 2.06
+is now available using the ``-mvsx`` switch.  Work remains on this, so it
+is not recommended for production use.  VSX is disabled for Little Endian
+regardless of this switch setting.
+
+Load/store cost estimates have been improved.
+
+Constant hoisting has been enabled.
+
+Global named register support has been enabled.
+
+Initial support for PIC code has been added for the 32-bit ELF subtarget.
+Further support will be available in a future release.
+
+
 Changes to CMake build system
 -----------------------------

--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@ -253,13 +253,16 @@ private:
                  const MDNode *TBAAInfo,
                  bool KnownMustAlias = false);
  void addUnknownInst(Instruction *I, AliasAnalysis &AA);
-  void removeUnknownInst(Instruction *I) {
+  void removeUnknownInst(AliasSetTracker &AST, Instruction *I) {
+    bool WasEmpty = UnknownInsts.empty();
    for (size_t i = 0, e = UnknownInsts.size(); i != e; ++i)
      if (UnknownInsts[i] == I) {
        UnknownInsts[i] = UnknownInsts.back();
        UnknownInsts.pop_back();
        --i; --e;  // Revisit the moved entry.
      }
+    if (!WasEmpty && UnknownInsts.empty())
+      dropRef(AST);
  }
  void setVolatile() { Volatile = true; }

--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@ -31,18 +31,25 @@ class TargetRegisterInfo;
 class CCValAssign {
 public:
  enum LocInfo {
-    Full,   // The value fills the full location.
-    SExt,   // The value is sign extended in the location.
-    ZExt,   // The value is zero extended in the location.
-    AExt,   // The value is extended with undefined upper bits.
-    BCvt,   // The value is bit-converted in the location.
-    VExt,   // The value is vector-widened in the location.
-            // FIXME: Not implemented yet. Code that uses AExt to mean
-            // vector-widen should be fixed to use VExt instead.
-    FPExt,  // The floating-point value is fp-extended in the location.
-    Indirect // The location contains pointer to the value.
+    Full,      // The value fills the full location.
+    SExt,      // The value is sign extended in the location.
+    ZExt,      // The value is zero extended in the location.
+    AExt,      // The value is extended with undefined upper bits.
+    BCvt,      // The value is bit-converted in the location.
+    VExt,      // The value is vector-widened in the location.
+               // FIXME: Not implemented yet. Code that uses AExt to mean
+               // vector-widen should be fixed to use VExt instead.
+    FPExt,     // The floating-point value is fp-extended in the location.
+    Indirect,  // The location contains pointer to the value.
+    SExtUpper, // The value is in the upper bits of the location and should be
+               // sign extended when retrieved.
+    ZExtUpper, // The value is in the upper bits of the location and should be
+               // zero extended when retrieved.
+    AExtUpper  // The value is in the upper bits of the location and should be
+               // extended with undefined upper bits when retrieved.
    // TODO: a subset of the value is in the location.
  };
+
 private:
  /// ValNo - This is the value number begin assigned (e.g. an argument number).
  unsigned ValNo;
@ -146,6 +153,9 @@ public:
    return (HTP == AExt || HTP == SExt || HTP == ZExt);
  }

+  bool isUpperBitsInLoc() const {
+    return HTP == AExtUpper || HTP == SExtUpper || HTP == ZExtUpper;
+  }
 };

 /// CCAssignFn - This function assigns a location for Val, updating State to
@ -208,10 +218,10 @@ private:
  // while "%t" goes to the stack: it wouldn't be described in ByValRegs.
  //
  // Supposed use-case for this collection:
-  // 1. Initially ByValRegs is empty, InRegsParamsProceed is 0.
+  // 1. Initially ByValRegs is empty, InRegsParamsProcessed is 0.
  // 2. HandleByVal fillups ByValRegs.
  // 3. Argument analysis (LowerFormatArguments, for example). After
-  // some byval argument was analyzed, InRegsParamsProceed is increased.
+  // some byval argument was analyzed, InRegsParamsProcessed is increased.
  struct ByValInfo {
    ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
      Begin(B), End(E), Waste(IsWaste) {}
@ -229,9 +239,9 @@ private:
  };
  SmallVector<ByValInfo, 4 > ByValRegs;

-  // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
+  // InRegsParamsProcessed - shows how many instances of ByValRegs was proceed
  // during argument analysis.
-  unsigned InRegsParamsProceed;
+  unsigned InRegsParamsProcessed;

 protected:
  ParmContext CallOrPrologue;
@ -412,7 +422,7 @@ public:
  unsigned getInRegsParamsCount() const { return ByValRegs.size(); }

  // Returns count of byval in-regs arguments proceed.
-  unsigned getInRegsParamsProceed() const { return InRegsParamsProceed; }
+  unsigned getInRegsParamsProcessed() const { return InRegsParamsProcessed; }

  // Get information about N-th byval parameter that is stored in registers.
  // Here "ByValParamIndex" is N.
@ -436,20 +446,20 @@ public:
  // Returns false, if end is reached.
  bool nextInRegsParam() {
    unsigned e = ByValRegs.size();
-    if (InRegsParamsProceed < e)
-      ++InRegsParamsProceed;
-    return InRegsParamsProceed < e;
+    if (InRegsParamsProcessed < e)
+      ++InRegsParamsProcessed;
+    return InRegsParamsProcessed < e;
  }

  // Clear byval registers tracking info.
  void clearByValRegsInfo() {
-    InRegsParamsProceed = 0;
+    InRegsParamsProcessed = 0;
    ByValRegs.clear();
  }

  // Rewind byval registers tracking info.
  void rewindByValRegsInfo() {
-    InRegsParamsProceed = 0;
+    InRegsParamsProcessed = 0;
  }

  ParmContext getCallOrPrologue() const { return CallOrPrologue; }
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@ -67,6 +67,9 @@ class CCIfSplit<CCAction A> : CCIf<"ArgFlags.isSplit()", A> {}
 /// the specified action.
 class CCIfSRet<CCAction A> : CCIf<"ArgFlags.isSRet()", A> {}

+/// CCIfVarArg - If the current function is vararg - apply the action
+class CCIfVarArg<CCAction A> : CCIf<"State.isVarArg()", A> {}
+
 /// CCIfNotVarArg - If the current function is not vararg - apply the action
 class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}

@ -119,6 +122,12 @@ class CCPromoteToType<ValueType destTy> : CCAction {
  ValueType DestTy = destTy;
 }

+/// CCPromoteToUpperBitsInType - If applied, this promotes the specified current
+/// value to the specified type and shifts the value into the upper bits.
+class CCPromoteToUpperBitsInType<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
 /// CCBitConvertToType - If applied, this bitconverts the specified current
 /// value to the specified type.
 class CCBitConvertToType<ValueType destTy> : CCAction {
@ -141,6 +150,13 @@ class CCDelegateTo<CallingConv cc> : CCAction {
 /// that the target supports.
 class CallingConv<list<CCAction> actions> {
  list<CCAction> Actions = actions;
+  bit Custom = 0;
+}
+
+/// CustomCallingConv - An instance of this is used to declare calling
+/// conventions that are implemented using a custom function of the same name.
+class CustomCallingConv : CallingConv<[]> {
+  let Custom = 1;
 }

 /// CalleeSavedRegs - A list of callee saved registers for a given calling
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@ -55,10 +55,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
      AliasTy = MayAlias;
  }

+  bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
  if (UnknownInsts.empty()) {            // Merge call sites...
-    if (!AS.UnknownInsts.empty())
+    if (ASHadUnknownInsts) {
      std::swap(UnknownInsts, AS.UnknownInsts);
-  } else if (!AS.UnknownInsts.empty()) {
+      addRef();
+    }
+  } else if (ASHadUnknownInsts) {
    UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
    AS.UnknownInsts.clear();
  }
@ -76,6 +79,8 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
    AS.PtrListEnd = &AS.PtrList;
    assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
  }
+  if (ASHadUnknownInsts)
+    AS.dropRef(AST);
 }

 void AliasSetTracker::removeAliasSet(AliasSet *AS) {
@ -123,6 +128,8 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
 }

 void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+  if (UnknownInsts.empty())
+    addRef();
  UnknownInsts.push_back(I);

  if (!I->mayWriteToMemory()) {
@ -218,13 +225,14 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
                                                  uint64_t Size,
                                                  const MDNode *TBAAInfo) {
  AliasSet *FoundSet = nullptr;
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+  for (iterator I = begin(), E = end(); I != E;) {
+    iterator Cur = I++;
+    if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
    
    if (!FoundSet) {      // If this is the first alias set ptr can go into.
-      FoundSet = I;       // Remember it.
+      FoundSet = Cur;     // Remember it.
    } else {              // Otherwise, we must merge the sets.
-      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
    }
  }

@ -246,14 +254,14 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,

 AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
  AliasSet *FoundSet = nullptr;
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
+  for (iterator I = begin(), E = end(); I != E;) {
+    iterator Cur = I++;
+    if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
      continue;
-    
    if (!FoundSet)            // If this is the first alias set ptr can go into.
-      FoundSet = I;           // Remember it.
-    else if (!I->Forward)     // Otherwise, we must merge the sets.
-      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+      FoundSet = Cur;         // Remember it.
+    else if (!Cur->Forward)   // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
  }
  return FoundSet;
 }
@ -393,6 +401,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
 /// tracker.
 void AliasSetTracker::remove(AliasSet &AS) {
  // Drop all call sites.
+  if (!AS.UnknownInsts.empty())
+    AS.dropRef(*this);
  AS.UnknownInsts.clear();
  
  // Clear the alias set.
@ -489,10 +499,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
  if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
    if (Inst->mayReadOrWriteMemory()) {
      // Scan all the alias sets to see if this call site is contained.
-      for (iterator I = begin(), E = end(); I != E; ++I) {
-        if (I->Forward) continue;
-        
-        I->removeUnknownInst(Inst);
+      for (iterator I = begin(), E = end(); I != E;) {
+        iterator Cur = I++;
+        if (!Cur->Forward)
+          Cur->removeUnknownInst(*this, Inst);
      }
    }
  }
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@ -14,7 +14,7 @@
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include <deque>
+#include <numeric>

 using namespace llvm;
 using namespace llvm::bfi_detail;
@ -123,8 +123,12 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
  }
  assert(W.Type == OtherW.Type);
  assert(W.TargetNode == OtherW.TargetNode);
-  assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
-  W.Amount += OtherW.Amount;
+  assert(OtherW.Amount && "Expected non-zero weight");
+  if (W.Amount > W.Amount + OtherW.Amount)
+    // Saturate on overflow.
+    W.Amount = UINT64_MAX;
+  else
+    W.Amount += OtherW.Amount;
 }
 static void combineWeightsBySorting(WeightList &Weights) {
  // Sort so edges to the same node are adjacent.
@ -207,11 +211,19 @@ void Distribution::normalize() {
    Shift = 33 - countLeadingZeros(Total);

  // Early exit if nothing needs to be scaled.
-  if (!Shift)
+  if (!Shift) {
+    // If we didn't overflow then combineWeights() shouldn't have changed the
+    // sum of the weights, but let's double-check.
+    assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
+                                    [](uint64_t Sum, const Weight &W) {
+                      return Sum + W.Amount;
+                    }) &&
+           "Expected total to be correct");
    return;
+  }

  // Recompute the total through accumulation (rather than shifting it) so that
-  // it's accurate after shifting.
+  // it's accurate after shifting and any changes combineWeights() made above.
  Total = 0;

  // Sum the weights to each node and shift right if necessary.
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@ -1987,23 +1987,31 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
  default:
    return true;
  case Instruction::UDiv:
-  case Instruction::URem:
-    // x / y is undefined if y == 0, but calculations like x / 3 are safe.
-    return isKnownNonZero(Inst->getOperand(1), TD);
+  case Instruction::URem: {
+    // x / y is undefined if y == 0.
+    const APInt *V;
+    if (match(Inst->getOperand(1), m_APInt(V)))
+      return *V != 0;
+    return false;
+  }
  case Instruction::SDiv:
  case Instruction::SRem: {
-    Value *Op = Inst->getOperand(1);
-    // x / y is undefined if y == 0
-    if (!isKnownNonZero(Op, TD))
-      return false;
-    // x / y might be undefined if y == -1
-    unsigned BitWidth = getBitWidth(Op->getType(), TD);
-    if (BitWidth == 0)
-      return false;
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(Op, KnownZero, KnownOne, TD);
-    return !!KnownZero;
+    // x / y is undefined if y == 0 or x == INT_MIN and y == -1
+    const APInt *X, *Y;
+    if (match(Inst->getOperand(1), m_APInt(Y))) {
+      if (*Y != 0) {
+        if (*Y == -1) {
+          // The numerator can't be MinSignedValue if the denominator is -1.
+          if (match(Inst->getOperand(0), m_APInt(X)))
+            return !Y->isMinSignedValue();
+          // The numerator *might* be MinSignedValue.
+          return false;
+        }
+        // The denominator is not 0 or -1, it's safe to proceed.
+        return true;
+      }
+    }
+    return false;
  }
  case Instruction::Load: {
    const LoadInst *LI = cast<LoadInst>(Inst);
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@ -340,6 +340,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
    break;
  case Triple::mips:
  case Triple::mipsel:
+  case Triple::mips64:
+  case Triple::mips64el:
    // MIPS uses indirect pointer to refer personality functions, so that the
    // eh_frame section can be read-only.  DW.ref.personality will be generated
    // for relocation.
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@ -566,11 +566,59 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    AFI->setShouldRestoreSPFromFP(true);
 }

+// Resolve TCReturn pseudo-instruction
+void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl = MBBI->getDebugLoc();
+  const ARMBaseInstrInfo &TII =
+      *MF.getTarget().getSubtarget<ARMSubtarget>().getInstrInfo();
+
+  if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
+    return;
+
+  // Tail call return: adjust the stack pointer and jump to callee.
+  MBBI = MBB.getLastNonDebugInstr();
+  MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+  // Jump to label or value in register.
+  if (RetOpcode == ARM::TCRETURNdi) {
+    unsigned TCOpcode = STI.isThumb() ?
+             (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+             ARM::TAILJMPd;
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+    if (JumpTarget.isGlobal())
+      MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                           JumpTarget.getTargetFlags());
+    else {
+      assert(JumpTarget.isSymbol());
+      MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                            JumpTarget.getTargetFlags());
+    }
+
+    // Add the default predicate in Thumb mode.
+    if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+  } else if (RetOpcode == ARM::TCRETURNri) {
+    BuildMI(MBB, MBBI, dl,
+            TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+      addReg(JumpTarget.getReg(), RegState::Kill);
+  }
+
+  MachineInstr *NewMI = std::prev(MBBI);
+  for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+    NewMI->addOperand(MBBI->getOperand(i));
+
+  // Delete the pseudo instruction TCRETURN.
+  MBB.erase(MBBI);
+  MBBI = NewMI;
+}
+
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  unsigned RetOpcode = MBBI->getOpcode();
  DebugLoc dl = MBBI->getDebugLoc();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@ -588,8 +636,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
+    fixTCReturn(MF, MBB);
    return;
+  }

  if (!AFI->hasStackFrame()) {
    if (NumBytes - ArgRegsSaveSize != 0)
@ -661,42 +711,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
    if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
  }

-  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
-    // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-
-    // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi) {
-      unsigned TCOpcode = STI.isThumb() ?
-               (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
-               ARM::TAILJMPd;
-      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
-      if (JumpTarget.isGlobal())
-        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                             JumpTarget.getTargetFlags());
-      else {
-        assert(JumpTarget.isSymbol());
-        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
-                              JumpTarget.getTargetFlags());
-      }
-
-      // Add the default predicate in Thumb mode.
-      if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
-    } else if (RetOpcode == ARM::TCRETURNri) {
-      BuildMI(MBB, MBBI, dl,
-              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    }
-
-    MachineInstr *NewMI = std::prev(MBBI);
-    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
-      NewMI->addOperand(MBBI->getOperand(i));
-
-    // Delete the pseudo instruction TCRETURN.
-    MBB.erase(MBBI);
-    MBBI = NewMI;
-  }
+  fixTCReturn(MF, MBB);

  if (ArgRegsSaveSize)
    emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@ -31,6 +31,8 @@ public:
  void emitPrologue(MachineFunction &MF) const override;
  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;

+  void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI,
                                 const std::vector<CalleeSavedInfo> &CSI,
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -1521,7 +1521,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      // True if this byval aggregate will be split between registers
      // and memory.
      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
-      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+      unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();

      if (CurByValIdx < ByValArgsCount) {

@ -2962,7 +2962,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
        if (Flags.isByVal()) {
          unsigned ExtraArgRegsSize;
          unsigned ExtraArgRegsSaveSize;
-          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
                         Flags.getByValSize(),
                         ExtraArgRegsSize, ExtraArgRegsSaveSize);

@ -3086,7 +3086,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
          // Since they could be overwritten by lowering of arguments in case of
          // a tail call.
          if (Flags.isByVal()) {
-            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+            unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();

            ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
            int FrameIndex = StoreByValRegs(
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@ -200,14 +200,14 @@ class MipsAsmParser : public MCTargetAsmParser {
  // Example: INSERT.B $w0[n], $1 => 16 > n >= 0
  bool validateMSAIndex(int Val, int RegKind);

-  void setFeatureBits(unsigned Feature, StringRef FeatureString) {
+  void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
    if (!(STI.getFeatureBits() & Feature)) {
      setAvailableFeatures(
          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
    }
  }

-  void clearFeatureBits(unsigned Feature, StringRef FeatureString) {
+  void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
    if (STI.getFeatureBits() & Feature) {
      setAvailableFeatures(
          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@ -22,8 +22,10 @@ add_llvm_target(MipsCodeGen
  Mips16ISelDAGToDAG.cpp
  Mips16ISelLowering.cpp
  Mips16RegisterInfo.cpp
+  MipsABIInfo.cpp
  MipsAnalyzeImmediate.cpp
  MipsAsmPrinter.cpp
+  MipsCCState.cpp
  MipsCodeEmitter.cpp
  MipsConstantIslandPass.cpp
  MipsDelaySlotFiller.cpp
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@ -250,6 +250,11 @@ static DecodeStatus DecodeMem(MCInst &Inst,
                              uint64_t Address,
                              const void *Decoder);

+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+                              unsigned Insn,
+                              uint64_t Address,
+                              const void *Decoder);
+
 static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder);

@ -267,6 +272,14 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
                               uint64_t Address,
                               const void *Decoder);

+static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn,
+                               uint64_t Address,
+                               const void *Decoder);
+
+static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
+                               uint64_t Address,
+                               const void *Decoder);
+
 static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
                                       unsigned Insn,
                                       uint64_t Address,
@ -451,7 +464,7 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
  bool HasRs = false;

  if (Rs >= Rt) {
@ -490,7 +503,7 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
  bool HasRs = false;

  if (Rs >= Rt) {
@ -530,7 +543,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
  bool HasRs = false;

  if (Rt == 0)
@ -575,7 +588,7 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;

  if (Rt == 0)
    return MCDisassembler::Fail;
@ -617,7 +630,7 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
  bool HasRs = false;
  bool HasRt = false;

@ -666,7 +679,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,

  InsnType Rs = fieldFromInstruction(insn, 21, 5);
  InsnType Rt = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
  bool HasRs = false;

  if (Rt == 0)
@ -964,6 +977,23 @@ static DecodeStatus DecodeMem(MCInst &Inst,
  return MCDisassembler::Success;
 }

+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+                              unsigned Insn,
+                              uint64_t Address,
+                              const void *Decoder) {
+  int Offset = SignExtend32<16>(Insn & 0xffff);
+  unsigned Hint = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::CreateReg(Base));
+  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::CreateImm(Hint));
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder) {
  int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@ -995,15 +1025,15 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
    break;
  case Mips::LD_H:
  case Mips::ST_H:
-    Inst.addOperand(MCOperand::CreateImm(Offset << 1));
+    Inst.addOperand(MCOperand::CreateImm(Offset * 2));
    break;
  case Mips::LD_W:
  case Mips::ST_W:
-    Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+    Inst.addOperand(MCOperand::CreateImm(Offset * 4));
    break;
  case Mips::LD_D:
  case Mips::ST_D:
-    Inst.addOperand(MCOperand::CreateImm(Offset << 3));
+    Inst.addOperand(MCOperand::CreateImm(Offset * 8));
    break;
  }

@ -1067,6 +1097,42 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
  return MCDisassembler::Success;
 }

+static DecodeStatus DecodeFMem2(MCInst &Inst,
+                               unsigned Insn,
+                               uint64_t Address,
+                               const void *Decoder) {
+  int Offset = SignExtend32<16>(Insn & 0xffff);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+  Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::CreateReg(Base));
+  Inst.addOperand(MCOperand::CreateImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMem3(MCInst &Inst,
+                               unsigned Insn,
+                               uint64_t Address,
+                               const void *Decoder) {
+  int Offset = SignExtend32<16>(Insn & 0xffff);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+  Reg = getReg(Decoder, Mips::COP3RegClassID, Reg);
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::CreateReg(Base));
+  Inst.addOperand(MCOperand::CreateImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
                                       unsigned Insn,
                                       uint64_t Address,
@ -1225,7 +1291,7 @@ static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                       unsigned Offset,
                                       uint64_t Address,
                                       const void *Decoder) {
-  int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4;
+  int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4;
  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
  return MCDisassembler::Success;
 }
@ -1244,7 +1310,7 @@ static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
                                         unsigned Offset,
                                         uint64_t Address,
                                         const void *Decoder) {
-  int32_t BranchOffset = SignExtend32<21>(Offset) << 2;
+  int32_t BranchOffset = SignExtend32<21>(Offset) * 4;

  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
  return MCDisassembler::Success;
@ -1254,7 +1320,7 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
                                         unsigned Offset,
                                         uint64_t Address,
                                         const void *Decoder) {
-  int32_t BranchOffset = SignExtend32<26>(Offset) << 2;
+  int32_t BranchOffset = SignExtend32<26>(Offset) * 4;

  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
  return MCDisassembler::Success;
@ -1264,7 +1330,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
                                         unsigned Offset,
                                         uint64_t Address,
                                         const void *Decoder) {
-  int32_t BranchOffset = SignExtend32<16>(Offset) << 1;
+  int32_t BranchOffset = SignExtend32<16>(Offset) * 2;
  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
  return MCDisassembler::Success;
 }
@ -1317,12 +1383,12 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,

 static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
+  Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) * 4));
  return MCDisassembler::Success;
 }

 static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3));
+  Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) * 8));
  return MCDisassembler::Success;
 }
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@ -57,6 +57,8 @@ def MipsInstrInfo : InstrInfo;
 // Mips Subtarget features                                                    //
 //===----------------------------------------------------------------------===//

+def FeatureNoABICalls  : SubtargetFeature<"noabicalls", "NoABICalls", "true",
+                                "Disable SVR4-style position-independent code.">;
 def FeatureGP64Bit     : SubtargetFeature<"gp64", "IsGP64bit", "true",
                                "General Purpose Registers are 64-bit wide.">;
 def FeatureFP64Bit     : SubtargetFeature<"fp64", "IsFP64bit", "true",
@ -67,13 +69,13 @@ def FeatureNaN2008     : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
                                "IEEE 754-2008 NaN encoding.">;
 def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
                                "true", "Only supports single precision float">;
-def FeatureO32         : SubtargetFeature<"o32", "MipsABI", "O32",
+def FeatureO32         : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
                                "Enable o32 ABI">;
-def FeatureN32         : SubtargetFeature<"n32", "MipsABI", "N32",
+def FeatureN32         : SubtargetFeature<"n32", "ABI", "MipsABIInfo::N32()",
                                "Enable n32 ABI">;
-def FeatureN64         : SubtargetFeature<"n64", "MipsABI", "N64",
+def FeatureN64         : SubtargetFeature<"n64", "ABI", "MipsABIInfo::N64()",
                                "Enable n64 ABI">;
-def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
+def FeatureEABI        : SubtargetFeature<"eabi", "ABI", "MipsABIInfo::EABI()",
                                "Enable eabi ABI">;
 def FeatureNoOddSPReg  : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
                              "Disable odd numbered single-precision "
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@ -241,10 +241,9 @@ Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  }
 }

-bool Mips16TargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                  unsigned NextStackOffset,
-                                  const MipsFunctionInfo& FI) const {
+bool Mips16TargetLowering::isEligibleForTailCallOptimization(
+    const CCState &CCInfo, unsigned NextStackOffset,
+    const MipsFunctionInfo &FI) const {
  // No tail call optimization for mips16.
  return false;
 }
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@ -30,9 +30,9 @@ namespace llvm {
                                MachineBasicBlock *MBB) const override;

  private:
-    bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                     unsigned NextStackOffset,
-                                     const MipsFunctionInfo& FI) const override;
+    bool isEligibleForTailCallOptimization(
+        const CCState &CCInfo, unsigned NextStackOffset,
+        const MipsFunctionInfo &FI) const override;

    void setMips16HardFloatLibCalls();

--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@ -419,6 +419,10 @@ defm : SetgePats<GPR64, SLT64, SLTu64>;
 defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;

 // truncate
+def : MipsPat<(trunc (assertsext GPR64:$src)),
+              (EXTRACT_SUBREG GPR64:$src, sub_32)>;
+def : MipsPat<(trunc (assertzext GPR64:$src)),
+              (EXTRACT_SUBREG GPR64:$src, sub_32)>;
 def : MipsPat<(i32 (trunc GPR64:$src)),
              (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;

--- a/lib/Target/Mips/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MipsABIInfo.cpp
@ -0,0 +1,45 @@
+//===---- MipsABIInfo.cpp - Information about MIPS ABI's ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsABIInfo.h"
+#include "MipsRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
+
+static const MCPhysReg Mips64IntRegs[8] = {
+    Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+    Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+  if (IsO32())
+    return makeArrayRef(O32IntRegs);
+  if (IsN32() || IsN64())
+    return makeArrayRef(Mips64IntRegs);
+  llvm_unreachable("Unhandled ABI");
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+  if (IsO32())
+    return makeArrayRef(O32IntRegs);
+  if (IsN32() || IsN64())
+    return makeArrayRef(Mips64IntRegs);
+  llvm_unreachable("Unhandled ABI");
+}
+
+unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
+  if (IsO32())
+    return CC != CallingConv::Fast ? 16 : 0;
+  if (IsN32() || IsN64() || IsEABI())
+    return 0;
+  llvm_unreachable("Unhandled ABI");
+}
--- a/lib/Target/Mips/MipsABIInfo.h
+++ b/lib/Target/Mips/MipsABIInfo.h
@ -0,0 +1,61 @@
+//===---- MipsABIInfo.h - Information about MIPS ABI's --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSABIINFO_H
+#define MIPSABIINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class MipsABIInfo {
+public:
+  enum class ABI { Unknown, O32, N32, N64, EABI };
+
+protected:
+  ABI ThisABI;
+
+public:
+  MipsABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
+
+  static MipsABIInfo Unknown() { return MipsABIInfo(ABI::Unknown); }
+  static MipsABIInfo O32() { return MipsABIInfo(ABI::O32); }
+  static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); }
+  static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); }
+  static MipsABIInfo EABI() { return MipsABIInfo(ABI::EABI); }
+
+  bool IsKnown() const { return ThisABI != ABI::Unknown; }
+  bool IsO32() const { return ThisABI == ABI::O32; }
+  bool IsN32() const { return ThisABI == ABI::N32; }
+  bool IsN64() const { return ThisABI == ABI::N64; }
+  bool IsEABI() const { return ThisABI == ABI::EABI; }
+  ABI GetEnumValue() const { return ThisABI; }
+
+  /// The registers to use for byval arguments.
+  const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+
+  /// The registers to use for the variable argument list.
+  const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+
+  /// Obtain the size of the area allocated by the callee for arguments.
+  /// CallingConv::FastCall affects the value for O32.
+  unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const;
+
+  /// Ordering of ABI's
+  /// MipsGenSubtargetInfo.inc will use this to resolve conflicts when given
+  /// multiple ABI options.
+  bool operator<(const MipsABIInfo Other) const {
+    return ThisABI < Other.GetEnumValue();
+  }
+};
+}
+
+#endif
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@ -317,11 +317,11 @@ void MipsAsmPrinter::emitFrameDirective() {

 /// Emit Set directives.
 const char *MipsAsmPrinter::getCurrentABIString() const {
-  switch (Subtarget->getTargetABI()) {
-  case MipsSubtarget::O32:  return "abi32";
-  case MipsSubtarget::N32:  return "abiN32";
-  case MipsSubtarget::N64:  return "abi64";
-  case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+  switch (Subtarget->getABI().GetEnumValue()) {
+  case MipsABIInfo::ABI::O32:  return "abi32";
+  case MipsABIInfo::ABI::N32:  return "abiN32";
+  case MipsABIInfo::ABI::N64:  return "abi64";
+  case MipsABIInfo::ABI::EABI: return "eabi32"; // TODO: handle eabi64
  default: llvm_unreachable("Unknown Mips ABI");
  }
 }
@ -471,14 +471,12 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
      return false;
    case 'z': {
      // $0 if zero, regular printing otherwise
-      if (MO.getType() != MachineOperand::MO_Immediate)
-        return true;
-      int64_t Val = MO.getImm();
-      if (Val)
-        O << Val;
-      else
+      if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) {
        O << "$0";
-      return false;
+        return false;
+      }
+      // If not, call printOperand as normal.
+      break;
    }
    case 'D': // Second part of a double word register operand
    case 'L': // Low order register of a double word register operand
@ -669,9 +667,7 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
 }

 void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  // TODO: Need to add -mabicalls and -mno-abicalls flags.
-  // Currently we assume that -mabicalls is the default.
-  bool IsABICalls = true;
+  bool IsABICalls = Subtarget->isABICalls();
  if (IsABICalls) {
    getTargetStreamer().emitDirectiveAbiCalls();
    Reloc::Model RM = TM.getRelocationModel();
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@ -0,0 +1,142 @@
+//===---- MipsCCState.cpp - CCState with Mips specific extensions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsCCState.h"
+#include "MipsSubtarget.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+  const char *const LibCalls[] = {
+      "__addtf3",      "__divtf3",     "__eqtf2",       "__extenddftf2",
+      "__extendsftf2", "__fixtfdi",    "__fixtfsi",     "__fixtfti",
+      "__fixunstfdi",  "__fixunstfsi", "__fixunstfti",  "__floatditf",
+      "__floatsitf",   "__floattitf",  "__floatunditf", "__floatunsitf",
+      "__floatuntitf", "__getf2",      "__gttf2",       "__letf2",
+      "__lttf2",       "__multf3",     "__netf2",       "__powitf2",
+      "__subtf3",      "__trunctfdf2", "__trunctfsf2",  "__unordtf2",
+      "ceill",         "copysignl",    "cosl",          "exp2l",
+      "expl",          "floorl",       "fmal",          "fmodl",
+      "log10l",        "log2l",        "logl",          "nearbyintl",
+      "powl",          "rintl",        "sinl",          "sqrtl",
+      "truncl"};
+
+  const char *const *End = LibCalls + array_lengthof(LibCalls);
+
+  // Check that LibCalls is sorted alphabetically.
+  MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+  for (const char *const *I = LibCalls; I < End - 1; ++I)
+    assert(Comp(*I, *(I + 1)));
+#endif
+
+  return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128, {f128} or i128 which was
+/// originally a fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+  if (Ty->isFP128Ty())
+    return true;
+
+  if (Ty->isStructTy() && Ty->getStructNumElements() == 1 &&
+      Ty->getStructElementType(0)->isFP128Ty())
+    return true;
+
+  const ExternalSymbolSDNode *ES =
+      dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+  // If the Ty is i128 and the function being called is a long double emulation
+  // routine, then the original type is f128.
+  return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsCCState::SpecialCallingConvType
+MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
+                                            const MipsSubtarget &Subtarget) {
+  MipsCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv;
+  if (Subtarget.inMips16HardFloat()) {
+    if (const GlobalAddressSDNode *G =
+            dyn_cast<const GlobalAddressSDNode>(Callee)) {
+      llvm::StringRef Sym = G->getGlobal()->getName();
+      Function *F = G->getGlobal()->getParent()->getFunction(Sym);
+      if (F && F->hasFnAttribute("__Mips16RetHelper")) {
+        SpecialCallingConv = Mips16RetHelperConv;
+      }
+    }
+  }
+  return SpecialCallingConv;
+}
+
+void MipsCCState::PreAnalyzeCallResultForF128(
+    const SmallVectorImpl<ISD::InputArg> &Ins,
+    const TargetLowering::CallLoweringInfo &CLI) {
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    OriginalArgWasF128.push_back(
+        originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode()));
+    OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy());
+  }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this for use by RetCC_MipsN.
+void MipsCCState::PreAnalyzeReturnForF128(
+    const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  const MachineFunction &MF = getMachineFunction();
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    OriginalArgWasF128.push_back(
+        originalTypeIsF128(MF.getFunction()->getReturnType(), nullptr));
+    OriginalArgWasFloat.push_back(
+        MF.getFunction()->getReturnType()->isFloatingPointTy());
+  }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeCallOperands(
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+    const SDNode *CallNode) {
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    OriginalArgWasF128.push_back(
+        originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
+    OriginalArgWasFloat.push_back(
+        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+    CallOperandIsFixed.push_back(Outs[i].IsFixed);
+  }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeFormalArgumentsForF128(
+    const SmallVectorImpl<ISD::InputArg> &Ins) {
+  const MachineFunction &MF = getMachineFunction();
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+
+    // SRet arguments cannot originate from f128 or {f128} returns so we just
+    // push false. We have to handle this specially since SRet arguments
+    // aren't mapped to an original argument.
+    if (Ins[i].Flags.isSRet()) {
+      OriginalArgWasF128.push_back(false);
+      OriginalArgWasFloat.push_back(false);
+      continue;
+    }
+
+    assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
+    std::advance(FuncArg, Ins[i].OrigArgIndex);
+
+    OriginalArgWasF128.push_back(
+        originalTypeIsF128(FuncArg->getType(), nullptr));
+    OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+  }
+}
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@ -0,0 +1,137 @@
+//===---- MipsCCState.h - CCState with Mips specific extensions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSCCSTATE_H
+#define MIPSCCSTATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "MipsISelLowering.h"
+
+namespace llvm {
+class SDNode;
+class MipsSubtarget;
+
+class MipsCCState : public CCState {
+public:
+  enum SpecialCallingConvType { Mips16RetHelperConv, NoSpecialCallingConv };
+
+  /// Determine the SpecialCallingConvType for the given callee
+  static SpecialCallingConvType
+  getSpecialCallingConvForCallee(const SDNode *Callee,
+                                 const MipsSubtarget &Subtarget);
+
+private:
+  /// Identify lowered values that originated from f128 arguments and record
+  /// this for use by RetCC_MipsN.
+  void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   const TargetLowering::CallLoweringInfo &CLI);
+
+  /// Identify lowered values that originated from f128 arguments and record
+  /// this for use by RetCC_MipsN.
+  void PreAnalyzeReturnForF128(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
+  /// Identify lowered values that originated from f128 arguments and record
+  /// this.
+  void
+  PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                         std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+                         const SDNode *CallNode);
+
+  /// Identify lowered values that originated from f128 arguments and record
+  /// this.
+  void
+  PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
+
+  /// Records whether the value has been lowered from an f128.
+  SmallVector<bool, 4> OriginalArgWasF128;
+
+  /// Records whether the value has been lowered from float.
+  SmallVector<bool, 4> OriginalArgWasFloat;
+
+  /// Records whether the value was a fixed argument.
+  /// See ISD::OutputArg::IsFixed,
+  SmallVector<bool, 4> CallOperandIsFixed;
+
+  // Used to handle MIPS16-specific calling convention tweaks.
+  // FIXME: This should probably be a fully fledged calling convention.
+  SpecialCallingConvType SpecialCallingConv;
+
+public:
+  MipsCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+              const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
+              LLVMContext &C,
+              SpecialCallingConvType SpecialCC = NoSpecialCallingConv)
+      : CCState(CC, isVarArg, MF, TM, locs, C), SpecialCallingConv(SpecialCC) {}
+
+  void
+  AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      CCAssignFn Fn,
+                      std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+                      const SDNode *CallNode) {
+    PreAnalyzeCallOperands(Outs, FuncArgs, CallNode);
+    CCState::AnalyzeCallOperands(Outs, Fn);
+    OriginalArgWasF128.clear();
+    OriginalArgWasFloat.clear();
+    CallOperandIsFixed.clear();
+  }
+
+  // The AnalyzeCallOperands in the base class is not usable since we must
+  // provide a means of accessing ArgListEntry::IsFixed. Delete them from this
+  // class. This doesn't stop them being used via the base class though.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+  void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn) {
+    PreAnalyzeFormalArgumentsForF128(Ins);
+    CCState::AnalyzeFormalArguments(Ins, Fn);
+    OriginalArgWasFloat.clear();
+    OriginalArgWasF128.clear();
+  }
+
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         CCAssignFn Fn,
+                         const TargetLowering::CallLoweringInfo &CLI) {
+    PreAnalyzeCallResultForF128(Ins, CLI);
+    CCState::AnalyzeCallResult(Ins, Fn);
+    OriginalArgWasFloat.clear();
+    OriginalArgWasF128.clear();
+  }
+
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     CCAssignFn Fn) {
+    PreAnalyzeReturnForF128(Outs);
+    CCState::AnalyzeReturn(Outs, Fn);
+    OriginalArgWasFloat.clear();
+    OriginalArgWasF128.clear();
+  }
+
+  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+                   CCAssignFn Fn) {
+    PreAnalyzeReturnForF128(ArgsFlags);
+    bool Return = CCState::CheckReturn(ArgsFlags, Fn);
+    OriginalArgWasFloat.clear();
+    OriginalArgWasF128.clear();
+    return Return;
+  }
+
+  bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; }
+  bool WasOriginalArgFloat(unsigned ValNo) {
+      return OriginalArgWasFloat[ValNo];
+  }
+  bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
+  SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
+};
+}
+
+#endif
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@ -10,8 +10,42 @@
 //===----------------------------------------------------------------------===//

 /// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
-  CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+class CCIfSubtarget<string F, CCAction A, string Invert = "">
+    : CCIf<!strconcat(Invert,
+                      "State.getMachineFunction().getTarget()."
+                          "getSubtarget<const MipsSubtarget>().",
+                      F),
+           A>;
+
+// The inverse of CCIfSubtarget
+class CCIfSubtargetNot<string F, CCAction A> : CCIfSubtarget<F, A, "!">;
+
+// For soft-float, f128 values are returned in A0_64 rather than V1_64.
+def RetCC_F128SoftFloat : CallingConv<[
+  CCAssignToReg<[V0_64, A0_64]>
+]>;
+
+// For hard-float, f128 values are returned as a pair of f64's rather than a
+// pair of i64's.
+def RetCC_F128HardFloat : CallingConv<[
+  CCBitConvertToType<f64>,
+
+  // Contrary to the ABI documentation, a struct containing a long double is
+  // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+  // match the de facto ABI as implemented by GCC.
+  CCIfInReg<CCAssignToReg<[D0_64, D1_64]>>,
+
+  CCAssignToReg<[D0_64, D2_64]>
+]>;
+
+// Handle F128 specially since we can't identify the original type during the
+// tablegen-erated code.
+def RetCC_F128 : CallingConv<[
+  CCIfSubtarget<"abiUsesSoftFloat()",
+      CCIfType<[i64], CCDelegateTo<RetCC_F128SoftFloat>>>,
+  CCIfSubtargetNot<"abiUsesSoftFloat()",
+      CCIfType<[i64], CCDelegateTo<RetCC_F128HardFloat>>>
+]>;

 //===----------------------------------------------------------------------===//
 // Mips O32 Calling Convention
@ -29,23 +63,43 @@ def RetCC_MipsO32 : CallingConv<[
  // f64 arguments are returned in D0_64 and D2_64 in FP64bit mode or
  // in D0 and D1 in FP32bit mode.
  CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D2_64]>>>,
-  CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>>
+  CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+def CC_MipsO32_FP32 : CustomCallingConv;
+def CC_MipsO32_FP64 : CustomCallingConv;
+
+def CC_MipsO32_FP : CallingConv<[
+  CCIfSubtargetNot<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP32>>,
+  CCIfSubtarget<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP64>>
 ]>;

 //===----------------------------------------------------------------------===//
 // Mips N32/64 Calling Convention
 //===----------------------------------------------------------------------===//

+def CC_MipsN_SoftFloat : CallingConv<[
+  CCAssignToRegWithShadow<[A0, A1, A2, A3,
+                           T0, T1, T2, T3],
+                          [D12_64, D13_64, D14_64, D15_64,
+                           D16_64, D17_64, D18_64, D19_64]>,
+  CCAssignToStack<4, 8>
+]>;
+
 def CC_MipsN : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i8, i16, i32],
+      CCIfSubtargetNot<"isLittle()",
+          CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
+
+  // All integers (except soft-float integers) are promoted to 64-bit.
+  CCIfType<[i8, i16, i32],
+     CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
+          CCPromoteToType<i64>>>,
+
+  // The only i32's we have left are soft-float arguments.
+  CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,

  // Integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
-                                           T0, T1, T2, T3],
-                                          [F12, F13, F14, F15,
-                                           F16, F17, F18, F19]>>,
-
  CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
                                           T0_64, T1_64, T2_64, T3_64],
                                          [D12_64, D13_64, D14_64, D15_64,
@ -64,29 +118,49 @@ def CC_MipsN : CallingConv<[
                                           T0_64, T1_64, T2_64, T3_64]>>,

  // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
-  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[f32], CCAssignToStack<4, 8>>,
  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;

 // N32/64 variable arguments.
 // All arguments are passed in integer registers.
 def CC_MipsN_VarArg : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // All integers are promoted to 64-bit.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,

-  CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+  CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,

  CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
                                      T0_64, T1_64, T2_64, T3_64]>>,

  // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
-  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[f32], CCAssignToStack<4, 8>>,
  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;

 def RetCC_MipsN : CallingConv<[
-  // i32 are returned in registers V0, V1
-  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+  // f128 needs to be handled similarly to f32 and f64. However, f128 is not
+  // legal and is lowered to i128 which is further lowered to a pair of i64's.
+  // This presents us with a problem for the calling convention since hard-float
+  // still needs to pass them in FPU registers, and soft-float needs to use $v0,
+  // and $a0 instead of the usual $v0, and $v1. We therefore resort to a
+  // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on
+  // whether the result was originally an f128 into the tablegen-erated code.
+  //
+  // f128 should only occur for the N64 ABI where long double is 128-bit. On
+  // N32, long double is equivalent to double.
+  CCIfType<[i64],
+      CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+           CCDelegateTo<RetCC_F128>>>,
+
+  // Aggregate returns are positioned at the lowest address in the slot for
+  // both little and big-endian targets. When passing in registers, this
+  // requires that big-endian targets shift the value into the upper bits.
+  CCIfSubtarget<"isLittle()",
+      CCIfType<[i8, i16, i32, i64], CCIfInReg<CCPromoteToType<i64>>>>,
+  CCIfSubtargetNot<"isLittle()",
+      CCIfType<[i8, i16, i32, i64],
+          CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,

  // i64 are returned in registers V0_64, V1_64
  CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
@ -98,12 +172,6 @@ def RetCC_MipsN : CallingConv<[
  CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
 ]>;

-// In soft-mode, register A0_64, instead of V1_64, is used to return a long
-// double value.
-def RetCC_F128Soft : CallingConv<[
-  CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
-]>;
-
 //===----------------------------------------------------------------------===//
 // Mips EABI Calling Convention
 //===----------------------------------------------------------------------===//
@ -119,11 +187,11 @@ def CC_MipsEABI : CallingConv<[
  CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
                  CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,

-  CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+  CCIfType<[f32], CCIfSubtargetNot<"isSingleFloat()",
                  CCAssignToReg<[F12, F14, F16, F18]>>>,

  // The first 4 double fp arguments are passed in single fp registers.
-  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+  CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()",
                  CCAssignToReg<[D6, D7, D8, D9]>>>,

  // Integer values get stored in stack slots that are 4 bytes in
@ -132,7 +200,7 @@ def CC_MipsEABI : CallingConv<[

  // Integer values get stored in stack slots that are 8 bytes in
  // size and 8-byte aligned.
-  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+  CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToStack<8, 8>>>
 ]>;

 def RetCC_MipsEABI : CallingConv<[
@ -143,7 +211,7 @@ def RetCC_MipsEABI : CallingConv<[
  CCIfType<[f32], CCAssignToReg<[F0, F1]>>,

  // f64 are returned in register D0
-  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+  CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToReg<[D0]>>>
 ]>;

 //===----------------------------------------------------------------------===//
@ -151,16 +219,20 @@ def RetCC_MipsEABI : CallingConv<[
 //===----------------------------------------------------------------------===//
 def CC_MipsO32_FastCC : CallingConv<[
  // f64 arguments are passed in double-precision floating pointer registers.
-  CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()",
-                                CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7,
-                                               D8, D9]>>>,
-  CCIfType<[f64], CCIfSubtarget<"isFP64bit()",
+  CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()",
+                                   CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6,
+                                                  D7, D8, D9]>>>,
+  CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"useOddSPReg()",
                                CCAssignToReg<[D0_64, D1_64, D2_64, D3_64,
                                               D4_64, D5_64, D6_64, D7_64,
                                               D8_64, D9_64, D10_64, D11_64,
                                               D12_64, D13_64, D14_64, D15_64,
                                               D16_64, D17_64, D18_64,
-                                               D19_64]>>>,
+                                               D19_64]>>>>,
+  CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"noOddSPReg()",
+                                CCAssignToReg<[D0_64, D2_64, D4_64, D6_64,
+                                               D8_64, D10_64, D12_64, D14_64,
+                                               D16_64, D18_64]>>>>,

  // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
  CCIfType<[f64], CCAssignToStack<8, 8>>
@ -192,7 +264,7 @@ def CC_Mips_FastCC : CallingConv<[

  // Integer arguments are passed in integer registers. All scratch registers,
  // except for AT, V0 and T9, are available to be used as argument registers.
-  CCIfType<[i32], CCIfSubtarget<"isNotTargetNaCl()",
+  CCIfType<[i32], CCIfSubtargetNot<"isTargetNaCl()",
      CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, V1]>>>,

  // In NaCl, T6, T7 and T8 are reserved and not available as argument
@ -219,13 +291,6 @@ def CC_Mips_FastCC : CallingConv<[
  CCDelegateTo<CC_MipsN_FastCC>
 ]>;

-//==
-
-def CC_Mips16RetHelper : CallingConv<[
-  // Integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
-]>;
-
 //===----------------------------------------------------------------------===//
 // Mips Calling Convention Dispatch
 //===----------------------------------------------------------------------===//
@ -237,6 +302,66 @@ def RetCC_Mips : CallingConv<[
  CCDelegateTo<RetCC_MipsO32>
 ]>;

+def CC_Mips_ByVal : CallingConv<[
+  CCIfSubtarget<"isABI_O32()", CCIfByVal<CCPassByVal<4, 4>>>,
+  CCIfByVal<CCPassByVal<8, 8>>
+]>;
+
+def CC_Mips16RetHelper : CallingConv<[
+  CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+  // Integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
+]>;
+
+def CC_Mips_FixedArg : CallingConv<[
+  // Mips16 needs special handling on some functions.
+  CCIf<"State.getCallingConv() != CallingConv::Fast",
+      CCIf<"static_cast<MipsCCState *>(&State)->getSpecialCallingConv() == "
+               "MipsCCState::Mips16RetHelperConv",
+           CCDelegateTo<CC_Mips16RetHelper>>>,
+
+  CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+  // f128 needs to be handled similarly to f32 and f64 on hard-float. However,
+  // f128 is not legal and is lowered to i128 which is further lowered to a pair
+  // of i64's.
+  // This presents us with a problem for the calling convention since hard-float
+  // still needs to pass them in FPU registers. We therefore resort to a
+  // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on
+  // whether the argument was originally an f128 into the tablegen-erated code.
+  //
+  // f128 should only occur for the N64 ABI where long double is 128-bit. On
+  // N32, long double is equivalent to double.
+  CCIfType<[i64],
+      CCIfSubtargetNot<"abiUsesSoftFloat()",
+          CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+              CCBitConvertToType<f64>>>>,
+
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
+
+  // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+  //        that it's the same as CC_MipsN
+  CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+  CCDelegateTo<CC_MipsN>
+]>;
+
+def CC_Mips_VarArg : CallingConv<[
+  CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+  // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+  //        that it's the same as CC_MipsN_VarArg
+  CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+  CCDelegateTo<CC_MipsN_VarArg>
+]>;
+
+def CC_Mips : CallingConv<[
+  CCIfVarArg<
+      CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)",
+          CCDelegateTo<CC_Mips_VarArg>>>,
+  CCDelegateTo<CC_Mips_FixedArg>
+]>;
+
 //===----------------------------------------------------------------------===//
 // Callee-saved register lists.
 //===----------------------------------------------------------------------===//
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@ -343,7 +343,6 @@ namespace {

  const TargetMachine &TM;
  bool IsPIC;
-  unsigned ABI;
  const MipsSubtarget *STI;
  const Mips16InstrInfo *TII;
  MipsFunctionInfo *MFI;
@ -366,8 +365,7 @@ namespace {
    static char ID;
    MipsConstantIslands(TargetMachine &tm)
        : MachineFunctionPass(ID), TM(tm),
-          IsPIC(TM.getRelocationModel() == Reloc::PIC_),
-          ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), STI(nullptr),
+          IsPIC(TM.getRelocationModel() == Reloc::PIC_), STI(nullptr),
          MF(nullptr), MCP(nullptr), PrescannedForConstants(false) {}

    const char *getPassName() const override {
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@ -210,6 +210,7 @@ namespace llvm {
  //===--------------------------------------------------------------------===//
  class MipsFunctionInfo;
  class MipsSubtarget;
+  class MipsCCState;

  class MipsTargetLowering : public TargetLowering  {
    bool isMicroMips;
@ -259,6 +260,8 @@ namespace llvm {
      }
    };

+    void HandleByVal(CCState *, unsigned &, unsigned) const override;
+
  protected:
    SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;

@ -338,101 +341,6 @@ namespace llvm {
                bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
                CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;

-    /// ByValArgInfo - Byval argument information.
-    struct ByValArgInfo {
-      unsigned FirstIdx; // Index of the first register used.
-      unsigned NumRegs;  // Number of registers used for this argument.
-      unsigned Address;  // Offset of the stack area used to pass this argument.
-
-      ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {}
-    };
-
-    /// MipsCC - This class provides methods used to analyze formal and call
-    /// arguments and inquire about calling convention information.
-    class MipsCC {
-    public:
-      enum SpecialCallingConvType {
-        Mips16RetHelperConv, NoSpecialCallingConv
-      };
-
-      MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info,
-             SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
-
-
-      void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               bool IsVarArg, bool IsSoftFloat,
-                               const SDNode *CallNode,
-                               std::vector<ArgListEntry> &FuncArgs);
-      void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  bool IsSoftFloat,
-                                  Function::const_arg_iterator FuncArg);
-
-      void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
-                             bool IsSoftFloat, const SDNode *CallNode,
-                             const Type *RetTy) const;
-
-      void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                         bool IsSoftFloat, const Type *RetTy) const;
-
-      const CCState &getCCInfo() const { return CCInfo; }
-
-      /// hasByValArg - Returns true if function has byval arguments.
-      bool hasByValArg() const { return !ByValArgs.empty(); }
-
-      /// regSize - Size (in number of bits) of integer registers.
-      unsigned regSize() const { return IsO32 ? 4 : 8; }
-
-      /// numIntArgRegs - Number of integer registers available for calls.
-      unsigned numIntArgRegs() const;
-
-      /// reservedArgArea - The size of the area the caller reserves for
-      /// register arguments. This is 16-byte if ABI is O32.
-      unsigned reservedArgArea() const;
-
-      /// Return pointer to array of integer argument registers.
-      const MCPhysReg *intArgRegs() const;
-
-      typedef SmallVectorImpl<ByValArgInfo>::const_iterator byval_iterator;
-      byval_iterator byval_begin() const { return ByValArgs.begin(); }
-      byval_iterator byval_end() const { return ByValArgs.end(); }
-
-    private:
-      void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
-                          CCValAssign::LocInfo LocInfo,
-                          ISD::ArgFlagsTy ArgFlags);
-
-      /// useRegsForByval - Returns true if the calling convention allows the
-      /// use of registers to pass byval arguments.
-      bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
-
-      /// Return the function that analyzes fixed argument list functions.
-      llvm::CCAssignFn *fixedArgFn() const;
-
-      /// Return the function that analyzes variable argument list functions.
-      llvm::CCAssignFn *varArgFn() const;
-
-      const MCPhysReg *shadowRegs() const;
-
-      void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
-                        unsigned Align);
-
-      /// Return the type of the register which is used to pass an argument or
-      /// return a value. This function returns f64 if the argument is an i64
-      /// value which has been generated as a result of softening an f128 value.
-      /// Otherwise, it just returns VT.
-      MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
-                   bool IsSoftFloat) const;
-
-      template<typename Ty>
-      void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
-                         const SDNode *CallNode, const Type *RetTy) const;
-
-      CCState &CCInfo;
-      CallingConv::ID CallConv;
-      bool IsO32, IsFP64;
-      SpecialCallingConvType SpecialCallingConv;
-      SmallVector<ByValArgInfo, 2> ByValArgs;
-    };
  protected:
    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
@ -461,14 +369,12 @@ namespace llvm {
    SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
                          unsigned Flag) const;

-    MipsCC::SpecialCallingConvType getSpecialCallingConv(SDValue Callee) const;
    // Lower Operand helpers
    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                            CallingConv::ID CallConv, bool isVarArg,
-                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                            SDLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals,
-                            const SDNode *CallNode, const Type *RetTy) const;
+                            const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+                            SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+                            TargetLowering::CallLoweringInfo &CLI) const;

    // Lower Operand specifics
    SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
@ -482,6 +388,7 @@ namespace llvm {
    SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
@ -497,33 +404,34 @@ namespace llvm {
    /// isEligibleForTailCallOptimization - Check whether the call is eligible
    /// for tail call optimization.
    virtual bool
-    isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+    isEligibleForTailCallOptimization(const CCState &CCInfo,
                                      unsigned NextStackOffset,
-                                      const MipsFunctionInfo& FI) const = 0;
+                                      const MipsFunctionInfo &FI) const = 0;

    /// copyByValArg - Copy argument registers which were used to pass a byval
    /// argument to the stack. Create a stack frame object for the byval
    /// argument.
-    void copyByValRegs(SDValue Chain, SDLoc DL,
-                       std::vector<SDValue> &OutChains, SelectionDAG &DAG,
-                       const ISD::ArgFlagsTy &Flags,
+    void copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
+                       SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
                       SmallVectorImpl<SDValue> &InVals,
-                       const Argument *FuncArg,
-                       const MipsCC &CC, const ByValArgInfo &ByVal) const;
+                       const Argument *FuncArg, unsigned FirstReg,
+                       unsigned LastReg, const CCValAssign &VA,
+                       MipsCCState &State) const;

    /// passByValArg - Pass a byval argument in registers or on stack.
    void passByValArg(SDValue Chain, SDLoc DL,
-                      std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+                      std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
                      SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
                      MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
-                      const MipsCC &CC, const ByValArgInfo &ByVal,
-                      const ISD::ArgFlagsTy &Flags, bool isLittle) const;
+                      unsigned FirstReg, unsigned LastReg,
+                      const ISD::ArgFlagsTy &Flags, bool isLittle,
+                      const CCValAssign &VA) const;

    /// writeVarArgRegs - Write variable function arguments passed in registers
    /// to the stack. Also create a stack frame object for the first variable
    /// argument.
-    void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
-                         SDValue Chain, SDLoc DL, SelectionDAG &DAG) const;
+    void writeVarArgRegs(std::vector<SDValue> &OutChains, SDValue Chain,
+                         SDLoc DL, SelectionDAG &DAG, CCState &State) const;

    SDValue
      LowerFormalArguments(SDValue Chain,
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@ -178,6 +178,38 @@ class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
  let mayStore = 1;
 }

+class SW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+            SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+  let DecoderMethod = "DecodeFMem2";
+  let mayStore = 1;
+}
+
+class LW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+            SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+  let DecoderMethod = "DecodeFMem2";
+  let mayLoad = 1;
+}
+
+class SW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+            SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+  let DecoderMethod = "DecodeFMem3";
+  let mayStore = 1;
+}
+
+class LW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+            SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+  let DecoderMethod = "DecodeFMem3";
+  let mayLoad = 1;
+}
+
 class MADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
               SDPatternOperator OpNode = null_frag> :
  InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
@ -407,24 +439,24 @@ def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
 // Cop2 Memory Instructions
 // FIXME: These aren't really FPU instructions and as such don't belong in this
 //        file
-def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
           ISA_MIPS1_NOT_32R6_64R6;
-def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+def SWC2 : SW_FT2<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
           ISA_MIPS1_NOT_32R6_64R6;
-def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+def LDC2 : LW_FT2<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
           ISA_MIPS2_NOT_32R6_64R6;
-def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+def SDC2 : SW_FT2<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
           ISA_MIPS2_NOT_32R6_64R6;

 // Cop3 Memory Instructions
 // FIXME: These aren't really FPU instructions and as such don't belong in this
 //        file
 let DecoderNamespace = "COP3_" in {
-  def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
-  def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
-  def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+  def LWC3 : LW_FT3<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+  def SWC3 : SW_FT3<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+  def LDC3 : LW_FT3<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
             ISA_MIPS2;
-  def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+  def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
             ISA_MIPS2;
 }

--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@ -1414,13 +1414,15 @@ def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
 def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
 def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;

-class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> :
+class CacheOp<string instr_asm, Operand MemOpnd> :
    InstSE<(outs), (ins  MemOpnd:$addr, uimm5:$hint),
-           !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>;
+           !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther> {
+  let DecoderMethod = "DecodeCacheOp";
+}

-def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>,
+def CACHE : CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
            INSN_MIPS3_32_NOT_32R6_64R6;
-def PREF :  CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>,
+def PREF :  CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
            INSN_MIPS3_32_NOT_32R6_64R6;

 //===----------------------------------------------------------------------===//
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@ -64,8 +64,8 @@ namespace {
    MipsLongBranch(TargetMachine &tm)
      : MachineFunctionPass(ID), TM(tm),
        IsPIC(TM.getRelocationModel() == Reloc::PIC_),
-        ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
-        LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 :
+        ABI(TM.getSubtarget<MipsSubtarget>().getABI()),
+        LongBranchSeqSize(!IsPIC ? 2 : (ABI.IsN64() ? 10 :
            (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}

    const char *getPassName() const override {
@ -86,7 +86,7 @@ namespace {
    MachineFunction *MF;
    SmallVector<MBBInfo, 16> MBBInfos;
    bool IsPIC;
-    unsigned ABI;
+    MipsABIInfo ABI;
    unsigned LongBranchSeqSize;
  };

@ -273,7 +273,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
    const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
    unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;

-    if (ABI != MipsSubtarget::N64) {
+    if (!ABI.IsN64()) {
      // $longbr:
      //  addiu $sp, $sp, -8
      //  sw $ra, 0($sp)
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@ -149,6 +149,12 @@ getReservedRegs(const MachineFunction &MF) const {
  for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
    Reserved.set(ReservedGPR64[I]);

+  // For mno-abicalls, GP is a program invariant!
+  if (!Subtarget.isABICalls()) {
+    Reserved.set(Mips::GP);
+    Reserved.set(Mips::GP_64);
+  }
+
  if (Subtarget.isFP64bit()) {
    // Reserve all registers in AFGR64.
    for (RegIter Reg = Mips::AFGR64RegClass.begin(),
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@ -325,6 +325,8 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
    // We re-use the same spill slot each time so that the stack frame doesn't
    // grow too much in functions with a large number of moves.
    int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
+    if (!Subtarget.isLittle())
+      std::swap(LoReg, HiReg);
    TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI,
                        0);
    TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI,
@ -369,6 +371,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
    unsigned DstReg = I->getOperand(0).getReg();
    unsigned SrcReg = I->getOperand(1).getReg();
    unsigned N = I->getOperand(2).getImm();
+    int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));

    // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
    // the cases where mfhc1 is not available). 64-bit architectures and
@ -385,7 +388,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
    int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
    TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI,
                        0);
-    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, N * 4);
+    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, Offset);
    return true;
  }

--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@ -1167,15 +1167,14 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  }
 }

-bool MipsSETargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                  unsigned NextStackOffset,
-                                  const MipsFunctionInfo& FI) const {
+bool MipsSETargetLowering::isEligibleForTailCallOptimization(
+    const CCState &CCInfo, unsigned NextStackOffset,
+    const MipsFunctionInfo &FI) const {
  if (!EnableMipsTailCalls)
    return false;

  // Return false if either the callee or caller has a byval argument.
-  if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+  if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
    return false;

  // Return true if the callee's argument area is no larger than the
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@ -50,9 +50,9 @@ namespace llvm {
    const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;

  private:
-    bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                     unsigned NextStackOffset,
-                                     const MipsFunctionInfo& FI) const override;
+    bool isEligibleForTailCallOptimization(
+        const CCState &CCInfo, unsigned NextStackOffset,
+        const MipsFunctionInfo &FI) const override;

    void
    getOpndList(SmallVectorImpl<SDValue> &Ops,
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@ -106,13 +106,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                             const std::string &FS, bool little,
                             MipsTargetMachine *_TM)
    : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
-      MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
-      IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
-      IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true),
-      HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
-      HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
-      InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
-      HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+      ABI(MipsABIInfo::Unknown()), IsLittle(little), IsSingleFloat(false),
+      IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+      IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
+      IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
+      HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
+      InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
+      InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+      AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
      HasMSA(false), TM(_TM), TargetTriple(TT),
      DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
      TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*this)),
@ -135,7 +136,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
    report_fatal_error("Code generation for MIPS-V is not implemented", false);

  // Assert exactly one ABI was chosen.
-  assert(MipsABI != UnknownABI);
+  assert(ABI.IsKnown());
  assert((((getFeatureBits() & Mips::FeatureO32) != 0) +
          ((getFeatureBits() & Mips::FeatureEABI) != 0) +
          ((getFeatureBits() & Mips::FeatureN32) != 0) +
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@ -23,6 +23,7 @@
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "MipsABIInfo.h"
 #include <string>

 #define GET_SUBTARGETINFO_HEADER
@ -36,13 +37,6 @@ class MipsTargetMachine;
 class MipsSubtarget : public MipsGenSubtargetInfo {
  virtual void anchor();

-public:
-  // NOTE: O64 will not be supported.
-  enum MipsABIEnum {
-    UnknownABI, O32, N32, N64, EABI
-  };
-
-protected:
  enum MipsArchEnum {
    Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
    Mips64r2, Mips64r6
@ -51,8 +45,8 @@ protected:
  // Mips architecture version
  MipsArchEnum MipsArchVersion;

-  // Mips supported ABIs
-  MipsABIEnum MipsABI;
+  // Selected ABI
+  MipsABIInfo ABI;

  // IsLittle - The target is Little Endian
  bool IsLittle;
@ -65,6 +59,9 @@ protected:
  // IsFPXX - MIPS O32 modeless ABI.
  bool IsFPXX;

+  // NoABICalls - Disable SVR4-style position-independent code.
+  bool NoABICalls;
+
  // IsFP64bit - The target processor has 64-bit floating point registers.
  bool IsFP64bit;

@ -157,12 +154,12 @@ public:
  CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;

  /// Only O32 and EABI supported right now.
-  bool isABI_EABI() const { return MipsABI == EABI; }
-  bool isABI_N64() const { return MipsABI == N64; }
-  bool isABI_N32() const { return MipsABI == N32; }
-  bool isABI_O32() const { return MipsABI == O32; }
+  bool isABI_EABI() const { return ABI.IsEABI(); }
+  bool isABI_N64() const { return ABI.IsN64(); }
+  bool isABI_N32() const { return ABI.IsN32(); }
+  bool isABI_O32() const { return ABI.IsO32(); }
  bool isABI_FPXX() const { return isABI_O32() && IsFPXX; }
-  unsigned getTargetABI() const { return MipsABI; }
+  const MipsABIInfo &getABI() const { return ABI; }

  /// This constructor initializes the data members to match that
  /// of the specified triple.
@ -200,16 +197,16 @@ public:
  bool hasCnMips() const { return HasCnMips; }

  bool isLittle() const { return IsLittle; }
+  bool isABICalls() const { return !NoABICalls; }
  bool isFPXX() const { return IsFPXX; }
  bool isFP64bit() const { return IsFP64bit; }
  bool useOddSPReg() const { return UseOddSPReg; }
  bool noOddSPReg() const { return !UseOddSPReg; }
  bool isNaN2008() const { return IsNaN2008bit; }
-  bool isNotFP64bit() const { return !IsFP64bit; }
  bool isGP64bit() const { return IsGP64bit; }
  bool isGP32bit() const { return !IsGP64bit; }
+  unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
  bool isSingleFloat() const { return IsSingleFloat; }
-  bool isNotSingleFloat() const { return !IsSingleFloat; }
  bool hasVFPU() const { return HasVFPU; }
  bool inMips16Mode() const { return InMips16Mode; }
  bool inMips16ModeDefault() const {
@ -248,7 +245,6 @@ public:
  bool os16() const { return Os16;};

  bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
-  bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); }

  // for now constant islands are on for the whole compilation unit but we only
  // really use them if in addition we are in mips16 mode
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@ -17,6 +17,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOpcodes.h"
@ -260,7 +261,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
  if (!MI->getOperand(OpNo).isImm())
    return printOperand(MI, OpNo, O);

-  O << (int)MI->getOperand(OpNo).getImm()*4;
+  O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
 }


@ -308,10 +309,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,

 void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
                                  raw_ostream &O) {
-  printBranchOperand(MI, OpNo, O);
+  // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+  // come at the _end_ of the expression.
+  const MCOperand &Op = MI->getOperand(OpNo);
+  const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+  O << refExp.getSymbol().getName();
  O << '(';
  printOperand(MI, OpNo+1, O);
  O << ')';
+  if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+    O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
 }


--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@ -236,7 +236,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
        Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
        break;
      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
-        Type = ELF::R_PPC64_GOT_TLSGD16;
+        if (is64Bit())
+          Type = ELF::R_PPC64_GOT_TLSGD16;
+        else
+          Type = ELF::R_PPC_GOT_TLSGD16;
        break;
      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
        Type = ELF::R_PPC64_GOT_TLSGD16_LO;
@ -248,7 +251,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
        Type = ELF::R_PPC64_GOT_TLSGD16_HA;
        break;
      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
-        Type = ELF::R_PPC64_GOT_TLSLD16;
+        if (is64Bit())
+          Type = ELF::R_PPC64_GOT_TLSLD16;
+        else
+          Type = ELF::R_PPC_GOT_TLSLD16;
        break;
      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
        Type = ELF::R_PPC64_GOT_TLSLD16_LO;
@ -344,13 +350,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
      switch (Modifier) {
      default: llvm_unreachable("Unsupported Modifier");
      case MCSymbolRefExpr::VK_PPC_TLSGD:
-        Type = ELF::R_PPC64_TLSGD;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLSGD;
+        else
+          Type = ELF::R_PPC_TLSGD;
        break;
      case MCSymbolRefExpr::VK_PPC_TLSLD:
-        Type = ELF::R_PPC64_TLSLD;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLSLD;
+        else
+          Type = ELF::R_PPC_TLSLD;
        break;
      case MCSymbolRefExpr::VK_PPC_TLS:
-        Type = ELF::R_PPC64_TLS;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLS;
+        else
+          Type = ELF::R_PPC_TLS;
        break;
      }
      break;
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@ -184,6 +184,23 @@ public:
    if ((Flags & ELF::EF_PPC64_ABI) == 0)
      MCA.setELFHeaderEFlags(Flags | 2);
  }
+  void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {
+    // When encoding an assignment to set symbol A to symbol B, also copy
+    // the st_other bits encoding the local entry point offset.
+    if (Value->getKind() != MCExpr::SymbolRef)
+      return;
+    const MCSymbol &RhsSym =
+        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
+    MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
+    MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
+    // The "other" values are stored in the last 6 bits of the second byte.
+    // The traditional defines for STO values assume the full byte and thus
+    // the shift to pack it.
+    unsigned Other = MCELF::getOther(SymbolData) << 2;
+    Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+    Other |= (MCELF::getOther(Data) << 2) & ELF::STO_PPC64_LOCAL_MASK;
+    MCELF::setOther(SymbolData, Other >> 2);
+  }
 };

 class PPCTargetMachOStreamer : public PPCTargetStreamer {
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@ -96,7 +96,12 @@ namespace llvm {
    MO_TOC_LO    = 7 << 4,

    // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
-    MO_TLS       = 8 << 4
+    MO_TLS       = 8 << 4,
+
+    // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
+    // call sequences.
+    MO_TLSLD     = 9 << 4,
+    MO_TLSGD     = 10 << 4
  };
  } // end namespace PPCII
  
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@ -373,7 +373,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    const MachineOperand &MO = MI->getOperand(1);

    // Map symbol -> label of TOC entry
-    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
    MCSymbol *MOSymbol = nullptr;
    if (MO.isGlobal())
      MOSymbol = getSymbol(MO.getGlobal());
@ -381,6 +381,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
      MOSymbol = GetCPISymbol(MO.getIndex());
    else if (MO.isJTI())
      MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());

    MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);

@ -397,6 +399,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
  }
  case PPC::LDtocJTI:
  case PPC::LDtocCPT:
+  case PPC::LDtocBA:
  case PPC::LDtoc: {
    // Transform %X3 = LDtoc <ga:@min1>, %X2
    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@ -407,7 +410,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    const MachineOperand &MO = MI->getOperand(1);

    // Map symbol -> label of TOC entry
-    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
    MCSymbol *MOSymbol = nullptr;
    if (MO.isGlobal())
      MOSymbol = getSymbol(MO.getGlobal());
@ -415,6 +418,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
      MOSymbol = GetCPISymbol(MO.getIndex());
    else if (MO.isJTI())
      MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());

    MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);

@ -436,7 +441,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    // reference the symbol directly.
    TmpInst.setOpcode(PPC::ADDIS8);
    const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
           "Invalid operand for ADDIStocHA!");
    MCSymbol *MOSymbol = nullptr;
    bool IsExternal = false;
@ -456,9 +462,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
      MOSymbol = GetCPISymbol(MO.getIndex());
    else if (MO.isJTI())
      MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());

    if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
-        MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+        MO.isJTI() || MO.isBlockAddress() ||
+        TM.getCodeModel() == CodeModel::Large)
      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);

    const MCExpr *Exp =
@ -477,12 +486,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    // associated TOC entry.  Otherwise reference the symbol directly.
    TmpInst.setOpcode(PPC::LD);
    const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
           "Invalid operand for LDtocL!");
    MCSymbol *MOSymbol = nullptr;

    if (MO.isJTI())
      MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+    else if (MO.isBlockAddress()) {
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
    else if (MO.isCPI()) {
      MOSymbol = GetCPISymbol(MO.getIndex());
      if (TM.getCodeModel() == CodeModel::Large)
@ -573,6 +587,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    return;
  }

+  case PPC::PPC32PICGOT: {
+    MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+    MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+      // FIXME: We would like an efficient form for this, so we don't have to do
+      // a lot of extra uniquing.
+      .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+    const MCExpr *OffsExpr =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+                                MCSymbolRefExpr::Create(GOTRef, OutContext),
+        OutContext);
+    OutStreamer.EmitLabel(GOTRef);
+    OutStreamer.EmitValue(OffsExpr, 4);
+    OutStreamer.EmitLabel(NextInstr);
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+                                .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+                                .addReg(MI->getOperand(1).getReg())
+                                .addImm(0)
+                                .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addReg(MI->getOperand(0).getReg()));
+    return;
+  }
  case PPC::PPC32GOT: {
    MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
    const MCExpr *SymGotTlsL =
@ -606,40 +648,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                .addExpr(SymGotTlsGD));
    return;
  }
-  case PPC::ADDItlsgdL: {
+  case PPC::ADDItlsgdL:
    // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsgd@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDItlsgdL32: {
+    // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@got@tlsgd
    const MachineOperand &MO = MI->getOperand(2);
    const GlobalValue *GValue = MO.getGlobal();
    MCSymbol *MOSymbol = getSymbol(GValue);
    const MCExpr *SymGotTlsGD =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
+      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymGotTlsGD));
-    return;
-  }
-  case PPC::GETtlsADDR: {
-    // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
-    StringRef Name = "__tls_get_addr";
-    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
-    const MCSymbolRefExpr *TlsRef = 
-      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
-    const MachineOperand &MO = MI->getOperand(2);
-    const GlobalValue *GValue = MO.getGlobal();
-    MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymVar =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
-                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
-                                .addExpr(TlsRef)
-                                .addExpr(SymVar));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymGotTlsGD));
    return;
  }
  case PPC::ADDIStlsldHA: {
@ -658,72 +685,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                .addExpr(SymGotTlsLD));
    return;
  }
-  case PPC::ADDItlsldL: {
+  case PPC::ADDItlsldL:
    // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsld@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDItlsldL32: {
+    // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@got@tlsld
    const MachineOperand &MO = MI->getOperand(2);
    const GlobalValue *GValue = MO.getGlobal();
    MCSymbol *MOSymbol = getSymbol(GValue);
    const MCExpr *SymGotTlsLD =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
+      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymGotTlsLD));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymGotTlsLD));
    return;
  }
-  case PPC::GETtlsldADDR: {
-    // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_TLS __tls_get_addr(sym@tlsld)
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
-    StringRef Name = "__tls_get_addr";
-    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
-    const MCSymbolRefExpr *TlsRef = 
-      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
-    const MachineOperand &MO = MI->getOperand(2);
-    const GlobalValue *GValue = MO.getGlobal();
-    MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymVar =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
-                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
-                                .addExpr(TlsRef)
-                                .addExpr(SymVar));
-    return;
-  }
-  case PPC::ADDISdtprelHA: {
+  case PPC::ADDISdtprelHA:
    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDISdtprelHA32: {
+    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha
    const MachineOperand &MO = MI->getOperand(2);
    const GlobalValue *GValue = MO.getGlobal();
    MCSymbol *MOSymbol = getSymbol(GValue);
    const MCExpr *SymDtprel =
      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(PPC::X3)
-                                .addExpr(SymDtprel));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+                   .addExpr(SymDtprel));
    return;
  }
-  case PPC::ADDIdtprelL: {
+  case PPC::ADDIdtprelL:
    // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
    // Into:      %Xd = ADDI8 %Xs, sym@dtprel@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDIdtprelL32: {
+    // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@dtprel@l
    const MachineOperand &MO = MI->getOperand(2);
    const GlobalValue *GValue = MO.getGlobal();
    MCSymbol *MOSymbol = getSymbol(GValue);
    const MCExpr *SymDtprel =
      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymDtprel));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymDtprel));
    return;
  }
  case PPC::MFOCRF:
@ -903,7 +921,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
    for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
         E = TOC.end(); I != E; ++I) {
      OutStreamer.EmitLabel(I->second);
-      MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+      MCSymbol *S = I->first;
      if (isPPC64)
        TS.emitTCEntry(*S);
      else
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@ -153,7 +153,7 @@ class PPCFastISel final : public FastISel {
                           unsigned DestReg, bool IsZExt);
    unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
    unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
-    unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+    unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
    unsigned PPCMaterialize32BitInt(int64_t Imm,
                                    const TargetRegisterClass *RC);
    unsigned PPCMaterialize64BitInt(int64_t Imm,
@ -865,7 +865,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
 }

 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
 // those should be used instead of moving via a stack slot when the
 // subtarget permits.
 // FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
@ -898,10 +898,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
  if (SrcVT == MVT::i32) {
    if (!IsSigned) {
      LoadOpc = PPC::LFIWZX;
-      Addr.Offset = 4;
+      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    } else if (PPCSubTarget->hasLFIWAX()) {
      LoadOpc = PPC::LFIWAX;
-      Addr.Offset = 4;
+      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    }
  }

@ -985,7 +985,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {

 // Move the floating-point value in SrcReg into an integer destination
 // register, and return the register (or zero if we can't handle it).
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
 // those should be used instead of moving via a stack slot when the
 // subtarget permits.
 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
@ -1548,13 +1548,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) {

    // Special case for returning a constant integer of any size.
    // Materialize the constant as an i64 and copy it to the return
-    // register.  This avoids an unnecessary extend or truncate.
+    // register. We still need to worry about properly extending the sign. E.g:
+    // If the constant has only one bit, it means it is a boolean. Therefore
+    // we can't use PPCMaterializeInt because it extends the sign which will
+    // cause negations of the returned value to be incorrect as they are
+    // implemented as the flip of the least significant bit.
    if (isa<ConstantInt>(*RV)) {
      const Constant *C = cast<Constant>(RV);
-      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
-      unsigned RetReg = ValLocs[0].getLocReg();
+
+      CCValAssign &VA = ValLocs[0];
+
+      unsigned RetReg = VA.getLocReg();
+      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
+                                          VA.getLocInfo() == CCValAssign::SExt);
+
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+            TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
      RetRegs.push_back(RetReg);

    } else {
@ -2014,7 +2024,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,

 // Materialize an integer constant into a register, and return
 // the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
+                                                           bool UseSExt) {
  // If we're using CR bit registers for i1 values, handle that as a special
  // case first.
  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
@ -2038,7 +2049,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
    unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
    unsigned ImmReg = createResultReg(RC);
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
-      .addImm(CI->getSExtValue());
+      .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
    return ImmReg;
  }

--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@ -505,7 +505,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineModuleInfo &MMI = MF.getMMI();
  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  DebugLoc dl;
-  bool needsFrameMoves = MMI.hasDebugInfo() ||
+  bool needsCFI = MMI.hasDebugInfo() ||
    MF.getFunction()->needsUnwindTableEntry();
  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;

@ -726,17 +726,28 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
      .addReg(ScratchReg);
  }

-  // Add the "machine moves" for the instructions we generated above, but in
-  // reverse order.
-  if (needsFrameMoves) {
-    // Show update of SP.
-    assert(NegFrameSize);
-    unsigned CFIIndex = MMI.addFrameInst(
-        MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+  // Add Call Frame Information for the instructions we generated above.
+  if (needsCFI) {
+    unsigned CFIIndex;
+
+    if (HasBP) {
+      // Define CFA in terms of BP. Do this in preference to using FP/SP,
+      // because if the stack needed aligning then CFA won't be at a fixed
+      // offset from FP/SP.
+      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+      CFIIndex = MMI.addFrameInst(
+          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+    } else {
+      // Adjust the definition of CFA to account for the change in SP.
+      assert(NegFrameSize);
+      CFIIndex = MMI.addFrameInst(
+          MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+    }
    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex);

    if (HasFP) {
+      // Describe where FP was saved, at a fixed offset from CFA.
      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
@ -745,6 +756,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
    }

    if (HasBP) {
+      // Describe where BP was saved, at a fixed offset from CFA.
      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
@ -753,6 +765,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
    }

    if (MustSaveLR) {
+      // Describe where LR was saved, at a fixed offset from CFA.
      unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
@ -767,8 +780,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
      .addReg(SPReg)
      .addReg(SPReg);

-    if (needsFrameMoves) {
-      // Mark effective beginning of when frame pointer is ready.
+    if (!HasBP && needsCFI) {
+      // Change the definition of CFA from SP+offset to FP+offset, because SP
+      // will change at every alloca.
      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
@ -778,8 +792,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
    }
  }

-  if (needsFrameMoves) {
-    // Add callee saved registers to move list.
+  if (needsCFI) {
+    // Describe where callee saved registers were saved, at fixed offsets from
+    // CFA.
    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
      unsigned Reg = CSI[I].getReg();
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -172,10 +172,20 @@ namespace {
    /// a register.  The case of adding a (possibly relocatable) constant to a
    /// register can be improved, but it is wrong to substitute Reg+Reg for
    /// Reg in an asm, because the load or store opcode would have to change.
-   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                      char ConstraintCode,
                                      std::vector<SDValue> &OutOps) override {
-      OutOps.push_back(Op);
+      // We need to make sure that this one operand does not end up in r0
+      // (because we might end up lowering this as 0(%op)).
+      const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+      const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+      SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+      SDValue NewOp =
+        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                       SDLoc(Op), Op.getValueType(),
+                                       Op, RC), 0);
+
+      OutOps.push_back(NewOp);
      return false;
    }

@ -1439,7 +1449,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {

    // For medium and large code model, we generate two instructions as
    // described below.  Otherwise we allow SelectCodeCommon to handle this,
-    // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+    // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
    CodeModel::Model CModel = TM.getCodeModel();
    if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
      break;
@ -1456,7 +1466,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
    SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
                                        TOCbase, GA);

-    if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+    if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+        CModel == CodeModel::Large)
      return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
                                    SDValue(Tmp, 0));

@ -1473,6 +1484,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
    return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
                                  SDValue(Tmp, 0), GA);
  }
+  case PPCISD::PPC32_PICGOT: {
+    // Generate a PIC-safe GOT reference.
+    assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+      "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+    return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(),  MVT::i32);
+  }
  case PPCISD::VADD_SPLAT: {
    // This expands into one of three sequences, depending on whether
    // the first operand is odd or even, positive or negative.
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -781,6 +781,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case PPCISD::SHL:             return "PPCISD::SHL";
  case PPCISD::CALL:            return "PPCISD::CALL";
  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
+  case PPCISD::CALL_TLS:        return "PPCISD::CALL_TLS";
+  case PPCISD::CALL_NOP_TLS:    return "PPCISD::CALL_NOP_TLS";
  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
@ -810,10 +812,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
-  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
-  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
@ -1631,8 +1631,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
                                             SelectionDAG &DAG) const {
  EVT PtrVT = Op.getValueType();
+  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+  const BlockAddress *BA = BASDN->getBlockAddress();

-  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  // 64-bit SVR4 ABI code is always position-independent.
+  // The actual BlockAddress is stored in the TOC.
+  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
+                       DAG.getRegister(PPC::X2, MVT::i64));
+  }

  unsigned MOHiFlag, MOLoFlag;
  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
@ -1641,6 +1649,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
 }

+// Generate a call to __tls_get_addr for the given GOT entry Op.
+std::pair<SDValue,SDValue>
+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
+                                SelectionDAG &DAG) const {
+
+  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Op;
+  Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+    .setCallee(CallingConv::C, IntPtrTy,
+               DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
+               std::move(Args), 0);
+
+  return LowerCallTo(CLI);
+}
+
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                              SelectionDAG &DAG) const {

@ -1684,50 +1713,40 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  }

  if (Model == TLSModel::GeneralDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
-                                     GOTReg, TGA);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TLSGD);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+                                   GOTReg, TGA);
+    } else {
+      GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
-                                   GOTEntryHi, TGA);
-
-    // We need a chain node, and don't have one handy.  The underlying
-    // call has no side effects, so using the function entry node
-    // suffices.
-    SDValue Chain = DAG.getEntryNode();
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
-    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
-    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
-                                  PtrVT, ParmReg, TGA);
-    // The return value from GET_TLS_ADDR really is in X3 already, but
-    // some hacks are needed here to tie everything together.  The extra
-    // copies dissolve during subsequent transforms.
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
-    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+                                   GOTPtr, TGA);
+    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+    return CallResult.first;
  }

  if (Model == TLSModel::LocalDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
-                                     GOTReg, TGA);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TLSLD);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+                           GOTReg, TGA);
+    } else {
+      GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
-                                   GOTEntryHi, TGA);
-
-    // We need a chain node, and don't have one handy.  The underlying
-    // call has no side effects, so using the function entry node
-    // suffices.
-    SDValue Chain = DAG.getEntryNode();
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
-    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
-    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
-                                  PtrVT, ParmReg, TGA);
-    // The return value from GET_TLSLD_ADDR really is in X3 already, but
-    // some hacks are needed here to tie everything together.  The extra
-    // copies dissolve during subsequent transforms.
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+                                   GOTPtr, TGA);
+    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+    SDValue TLSAddr = CallResult.first;
+    SDValue Chain = CallResult.second;
    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
-                                      Chain, ParmReg, TGA);
+                                      Chain, TLSAddr, TGA);
    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
  }

@ -2676,7 +2695,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      int FI;
      if (HasParameterArea ||
          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
-        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
      else
        FI = MFI->CreateStackObject(ArgSize, Align, false);
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@ -3042,7 +3061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
        CurArgOffset = CurArgOffset + (4 - ObjSize);
      }
      // The value of the object is its address.
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false);
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
      InVals.push_back(FIN);
      if (ObjSize==1 || ObjSize==2) {
@ -3690,6 +3709,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
  if (Callee.getNode()) {
    Ops.push_back(Chain);
    Ops.push_back(Callee);
+
+    // If this is a call to __tls_get_addr, find the symbol whose address
+    // is to be taken and add it to the list.  This will be used to 
+    // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
+    // We find the symbol by walking the chain to the CopyFromReg, walking
+    // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
+    // pulling the symbol from that node.
+    if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+      if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
+        assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
+        SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
+        SDValue TGTAddr = AddI->getOperand(1);
+        assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
+               "Didn't find target global TLS address where we expected one");
+        Ops.push_back(TGTAddr);
+        CallOpc = PPCISD::CALL_TLS;
+      }
  }
  // If this is a tail call add stack pointer delta.
  if (isTailCall)
@ -3841,7 +3877,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
      // Otherwise insert NOP for non-local calls.
      CallOpc = PPCISD::CALL_NOP;
-    }
+    } else if (CallOpc == PPCISD::CALL_TLS)
+      // For 64-bit SVR4, TLS calls are always non-local.
+      CallOpc = PPCISD::CALL_NOP_TLS;
  }

  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@ -8936,6 +8974,12 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                          &PPC::G8RCRegClass);
  }

+  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
+  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
+    R.first = PPC::CR0;
+    R.second = &PPC::CRRCRegClass;
+  }
+
  return R;
 }

@ -8964,37 +9008,42 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  case 'P': {
    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
    if (!CST) return; // Must be an immediate to match.
-    unsigned Value = CST->getZExtValue();
+    int64_t Value = CST->getSExtValue();
+    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
+                         // numbers are printed as such.
    switch (Letter) {
    default: llvm_unreachable("Unknown constraint letter!");
    case 'I':  // "I" is a signed 16-bit constant.
-      if ((short)Value == (int)Value)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      if (isInt<16>(Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
+      if (isShiftedUInt<16, 16>(Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
+      break;
    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
-      if ((short)Value == 0)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      if (isShiftedInt<16, 16>(Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
-      if ((Value >> 16) == 0)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      if (isUInt<16>(Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'M':  // "M" is a constant that is greater than 31.
      if (Value > 31)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'N':  // "N" is a positive constant that is an exact power of two.
-      if ((int)Value > 0 && isPowerOf2_32(Value))
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      if (Value > 0 && isPowerOf2_64(Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'O':  // "O" is the constant zero.
      if (Value == 0)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
-      if ((short)-Value == (int)-Value)
-        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      if (isInt<16>(-Value))
+        Result = DAG.getTargetConstant(Value, TCVT);
      break;
    }
    break;
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -99,6 +99,10 @@ namespace llvm {
      /// SVR4 calls.
      CALL, CALL_NOP,

+      /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
+      /// to access TLS variables.
+      CALL_TLS, CALL_NOP_TLS,
+
      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
      /// MTCTR instruction.
      MTCTR,
@ -181,6 +185,10 @@ namespace llvm {
      /// on PPC32.
      PPC32_GOT,

+      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+      /// local dynamic TLS  on PPC32.
+      PPC32_PICGOT,
+
      /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
      /// TLS model, produces an ADDIS8 instruction that adds the GOT
      /// base to sym\@got\@tprel\@ha.
@ -210,10 +218,6 @@ namespace llvm {
      /// sym\@got\@tlsgd\@l.
      ADDI_TLSGD_L,

-      /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsgd).
-      GET_TLS_ADDR,
-
      /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
      /// model, produces an ADDIS8 instruction that adds the GOT base
      /// register to sym\@got\@tlsld\@ha.
@ -224,10 +228,6 @@ namespace llvm {
      /// sym\@got\@tlsld\@l.
      ADDI_TLSLD_L,

-      /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsld).
-      GET_TLSLD_ADDR,
-
      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
      /// local-dynamic TLS model, produces an ADDIS8 instruction
      /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
@ -548,6 +548,8 @@ namespace llvm {
    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
+                                            SelectionDAG &DAG) const;
    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
          (BL8_NOP texternalsym:$dst)>;

+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
+          (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
+
 // Atomic operations
 let usesCustomInserter = 1 in {
  let Defs = [CR0] in {
@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                    "ld $rD, $src", IIC_LdStLD,
                    [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
-// The following three definitions are selected for small code model only.
+// The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
@ -801,6 +804,10 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                  "#LDtocCPT",
                  [(set i64:$rD,
                     (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+                  "#LDtocCPT",
+                  [(set i64:$rD,
+                     (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;

 let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
 def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                       [(set i64:$rD,
                         (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                 isPPC64;
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
-                        "#GETtlsADDR",
-                        [(set i64:$rD,
-                          (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
-                 isPPC64;
 def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                         "#ADDIStlsldHA",
                         [(set i64:$rD,
@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                       [(set i64:$rD,
                         (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                 isPPC64;
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
-                          "#GETtlsldADDR",
-                          [(set i64:$rD,
-                            (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
-                   isPPC64;
 def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDISdtprelHA",
                          [(set i64:$rD,
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -110,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
 def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
-def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
 def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
                              [SDNPHasChain]>;
 def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
@ -136,9 +134,15 @@ def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                       SDNPVariadic]>;
+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
+                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                          SDNPVariadic]>;
 def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                           SDNPVariadic]>;
+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@ -588,6 +592,12 @@ def tlsreg32 : Operand<i32> {
  let EncoderMethod = "getTLSRegEncoding";
  let ParserMatchClass = PPCTLSRegOperand;
 }
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+  let PrintMethod = "printTLSCall";
+  let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+  let EncoderMethod = "getTLSCallEncoding";
+}

 // PowerPC Predicate operand.
 def pred : Operand<OtherVT> {
@ -1071,6 +1081,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
                    "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;

    let isCodeGenOnly = 1 in {
+      def BL_TLS  : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+                          "bl $func", IIC_BrB, []>;
      def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
                       "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
      def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
@ -2358,6 +2370,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
          (BL texternalsym:$dst)>;

+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
+          (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;

 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
          (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@ -2396,13 +2410,37 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
 def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", 
                [(set i32:$rD, (PPCppc32GOT))]>;

+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", 
+                []>, NoEncode<"$rT">;
+
 def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
-                        "#LDgotTprelL32",
-                        [(set i32:$rD,
-                          (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+                           "#LDgotTprelL32",
+                           [(set i32:$rD,
+                             (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
 def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
          (ADD4TLS $in, tglobaltlsaddr:$g)>;

+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                         "#ADDItlsgdL32",
+                         [(set i32:$rD,
+                           (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                          "#ADDItlsldL32",
+                          [(set i32:$rD,
+                            (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                           "#ADDIdtprelL32",
+                           [(set i32:$rD,
+                             (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                            "#ADDISdtprelHA32",
+                            [(set i32:$rD,
+                              (PPCaddisDtprelHA i32:$reg,
+                                                tglobaltlsaddr:$disp))]>;
+
 // Support for Position-independent code
 def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
                  "#LWZtoc",
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@ -137,6 +137,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
    case PPCII::MO_TLS:
      RefKind = MCSymbolRefExpr::VK_PPC_TLS;
      break;
+    case PPCII::MO_TLSGD:
+      RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
+      break;
+    case PPCII::MO_TLSLD:
+      RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
+      break;
  }

  if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@ -188,13 +188,6 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
 def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
 }

-// The full condition-code register. This is not modeled fully, but defined
-// here primarily, for compatibility with gcc, to allow the inline asm "cc"
-// clobber specification to work.
-def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
-  let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
-}
-
 // Link register
 def LR  : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
 //let Aliases = [LR] in
@ -308,7 +301,3 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
  let CopyCost = -1;
 }

-def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
-  let isAllocatable = 0;
-}
-
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@ -2829,6 +2829,9 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
      // VExt has not been implemented, so this should be impossible to reach
      // for now.  However, fallback to Selection DAG isel once implemented.
      return false;
+    case CCValAssign::AExtUpper:
+    case CCValAssign::SExtUpper:
+    case CCValAssign::ZExtUpper:
    case CCValAssign::FPExt:
      llvm_unreachable("Unexpected loc info!");
    case CCValAssign::Indirect:
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -1623,15 +1623,30 @@ LinearFunctionTestReplace(Loop *L,
  // compare against the post-incremented value, otherwise we must compare
  // against the preincremented value.
  if (L->getExitingBlock() == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // This addition may overflow, which is valid as long as the comparison is
-    // truncated to BackedgeTakenCount->getType().
-    IVCount = SE->getAddExpr(BackedgeTakenCount,
-                             SE->getConstant(BackedgeTakenCount->getType(), 1));
    // The BackedgeTaken expression contains the number of times that the
    // backedge branches to the loop header.  This is one less than the
    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    llvm::Value *IncrementedIndvar =
+        IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    const auto *IncrementedIndvarSCEV =
+        cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
+    // It is unsafe to use the incremented indvar if it has a wrapping flag, we
+    // don't want to compare against a poison value.  Check the SCEV that
+    // corresponds to the incremented indvar, the SCEVExpander will only insert
+    // flags in the IR if the SCEV originally had wrapping flags.
+    // FIXME: In theory, SCEV could drop flags even though they exist in IR.
+    // A more robust solution would involve getting a new expression for
+    // CmpIndVar by applying non-NSW/NUW AddExprs.
+    if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
+                                    SCEV::FlagNUW | SCEV::FlagNSW)) {
+      // Add one to the "backedge-taken" count to get the trip count.
+      // This addition may overflow, which is valid as long as the comparison is
+      // truncated to BackedgeTakenCount->getType().
+      IVCount =
+          SE->getAddExpr(BackedgeTakenCount,
+                         SE->getConstant(BackedgeTakenCount->getType(), 1));
+      CmpIndVar = IncrementedIndvar;
+    }
  }

  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@ -27,18 +27,18 @@
 //
 //            header:
 //                     br %cond, label %if.then, label %if.else
-//                        /                    \
-//                       /                      \
-//                      /                        \
+//                        +                    +
+//                       +                      +
+//                      +                        +
 //            if.then:                         if.else:
 //               %lt = load %addr_l               %le = load %addr_l
 //               <use %lt>                        <use %le>
 //               <...>                            <...>
 //               store %st, %addr_s               store %se, %addr_s
 //               br label %if.end                 br label %if.end
-//                     \                         /
-//                      \                       /
-//                       \                     /
+//                     +                         +
+//                      +                       +
+//                       +                     +
 //            if.end ("footer"):
 //                     <...>
 //
@ -47,16 +47,16 @@
 //            header:
 //                     %l = load %addr_l
 //                     br %cond, label %if.then, label %if.else
-//                        /                    \
-//                       /                      \
-//                      /                        \
+//                        +                    +
+//                       +                      +
+//                      +                        +
 //            if.then:                         if.else:
 //               <use %l>                         <use %l>
 //               <...>                            <...>
 //               br label %if.end                 br label %if.end
-//                      \                        /
-//                       \                      /
-//                        \                    /
+//                      +                        +
+//                       +                      +
+//                        +                    +
 //            if.end ("footer"):
 //                     %s.sink = phi [%st, if.then], [%se, if.else]
 //                     <...>
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@ -2697,7 +2697,10 @@ private:
    // the old pointer, which necessarily must be in the right position to
    // dominate the PHI.
    IRBuilderTy PtrBuilder(IRB);
-    PtrBuilder.SetInsertPoint(OldPtr);
+    if (isa<PHINode>(OldPtr))
+      PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+    else
+      PtrBuilder.SetInsertPoint(OldPtr);
    PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());

    Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -3357,7 +3357,7 @@ void InnerLoopVectorizer::updateAnalysis() {
  DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
  DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
  DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
-  DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+  DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);

  DEBUG(DT->verifyDomTree());
 }
@ -3466,6 +3466,15 @@ bool LoopVectorizationLegality::canVectorize() {
    return false;
  }

+  // We only handle bottom-tested loops, i.e. loop in which the condition is
+  // checked at the end of each iteration. With that we can assume that all
+  // instructions in the loop are executed the same number of times.
+  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+    emitAnalysis(
+        Report() << "loop control flow is not understood by vectorizer");
+    return false;
+  }
+
  // We need to have a loop header.
  DEBUG(dbgs() << "LV: Found a loop: " <<
        TheLoop->getHeader()->getName() << '\n');
@ -5192,7 +5201,13 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
    return IK_NoInduction;

  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
-  uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+  Type *PointerElementType = PhiTy->getPointerElementType();
+  // The pointer stride cannot be determined if the pointer element type is not
+  // sized.
+  if (!PointerElementType->isSized())
+    return IK_NoInduction;
+
+  uint64_t Size = DL->getTypeAllocSize(PointerElementType);
  if (C->getValue()->equalsInt(Size))
    return IK_PtrInduction;
  else if (C->getValue()->equalsInt(0 - Size))
--- a/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
+++ b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; PR21622: Check for a crasher when the sum of exits to the same successor of a
+; loop overflows.
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'extremely_likely_loop_successor':
+; CHECK-NEXT: block-frequency-info: extremely_likely_loop_successor
+define void @extremely_likely_loop_successor() {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %loop
+
+; CHECK-NEXT: loop: float = 1.0,
+loop:
+  %exit.1.cond = call i1 @foo()
+  br i1 %exit.1.cond, label %exit, label %loop.2, !prof !0
+
+; CHECK-NEXT: loop.2: float = 0.0000000
+loop.2:
+  %exit.2.cond = call i1 @foo()
+  br i1 %exit.2.cond, label %exit, label %loop.3, !prof !0
+
+; CHECK-NEXT: loop.3: float = 0.0000000
+loop.3:
+  %exit.3.cond = call i1 @foo()
+  br i1 %exit.3.cond, label %exit, label %loop.4, !prof !0
+
+; CHECK-NEXT: loop.4: float = 0.0,
+loop.4:
+  %exit.4.cond = call i1 @foo()
+  br i1 %exit.4.cond, label %exit, label %loop, !prof !0
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret void
+}
+
+declare i1 @foo()
+
+!0 = metadata !{metadata !"branch_weights", i32 4294967295, i32 1}
--- a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s
+
+declare cc 10 void @g()
+
+define cc 10 void @test_direct_tail() {
+; CHECK-LABEL: test_direct_tail:
+; CHECK: b g
+
+  tail call cc10 void @g()
+  ret void
+}
+
+@ind_func = global void()* zeroinitializer
+
+define cc 10 void @test_indirect_tail() {
+; CHECK-LABEL: test_indirect_tail:
+; CHECK: bx {{r[0-9]+}}
+  %func = load void()** @ind_func
+  tail call cc10 void()* %func()
+  ret void
+}
--- a/test/CodeGen/Mips/abicalls.ll
+++ b/test/CodeGen/Mips/abicalls.ll
@ -1,16 +1,11 @@
-; 
-; When the assembler is ready a .s file for it will
-; be created.
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=STATIC %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s

-; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
-; TODO need to support -mno-abicalls
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr noabicalls -relocation-model=static %s -o - | FileCheck -implicit-check-not='.abicalls' -implicit-check-not='pic0' %s

-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; ABICALLS: .abicalls

-; CHECK-STATIC: .abicalls
-; CHECK-STATIC-NEXT: pic0
-; CHECK-PIC: .abicalls
-; CHECK-PIC-NOT: pic0
+; STATIC: pic0
+; PIC-NOT: pic0
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@ -12,7 +12,7 @@

@x = common global i32 0, align 4

-define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
 entry:
  %0 = atomicrmw add i32* @x, i32 %incr monotonic
  ret i32 %0
@ -29,7 +29,7 @@ entry:
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }

-define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
 entry:
  %0 = atomicrmw nand i32* @x, i32 %incr monotonic
  ret i32 %0
@ -47,7 +47,7 @@ entry:
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }

-define i32 @AtomicSwap32(i32 %newval) nounwind {
+define i32 @AtomicSwap32(i32 signext %newval) nounwind {
 entry:
  %newval.addr = alloca i32, align 4
  store i32 %newval, i32* %newval.addr, align 4
@ -66,7 +66,7 @@ entry:
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }

-define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
 entry:
  %newval.addr = alloca i32, align 4
  store i32 %newval, i32* %newval.addr, align 4
@ -293,7 +293,7 @@ entry:
 ; HAS-SEB-SEH:   seb     $2, $[[R17]]
 }

-define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind {
+define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind {
 entry:
  %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
  %1 = extractvalue { i8, i1 } %0, 1
@ -381,7 +381,7 @@ entry:

@countsint = common global i32 0, align 4

-define i32 @CheckSync(i32 %v) nounwind noinline {
+define i32 @CheckSync(i32 signext %v) nounwind noinline {
 entry:
  %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
  ret i32 %0 
@ -415,7 +415,7 @@ entry:

 ; Check that MIPS32R6 has the correct offset range.
 ; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
-define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
 entry:
  %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
  ret i32 %0
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@ -2,7 +2,7 @@
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
 ; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16

-define i32 @bswap32(i32 %x) nounwind readnone {
+define i32 @bswap32(i32 signext %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap32:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
@ -29,7 +29,7 @@ entry:
  ret i32 %or.3
 }

-define i64 @bswap64(i64 %x) nounwind readnone {
+define i64 @bswap64(i64 signext %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap64:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
@ -72,24 +72,24 @@ entry:
 define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswapv4i32:
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16

 ; MIPS64-LABEL: bswapv4i32:
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16

 ; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and
 ; would be very long
--- a/test/CodeGen/Mips/cconv/arguments-float.ll
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@ -69,26 +69,26 @@ entry:
 ; O32-DAG:           sw [[R4]], 28([[R2]])
 ; NEW-DAG:           sd $6, 24([[R2]])

-; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
-; O32-DAG:           lw [[R4:\$[0-9]+]], 36($sp)
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 32($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 36($sp)
 ; O32-DAG:           sw [[R3]], 32([[R2]])
 ; O32-DAG:           sw [[R4]], 36([[R2]])
 ; NEW-DAG:           sd $7, 32([[R2]])

-; O32-DAG:           lw [[R3:\$[0-9]+]], 40($sp)
-; O32-DAG:           lw [[R4:\$[0-9]+]], 44($sp)
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 40($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 44($sp)
 ; O32-DAG:           sw [[R3]], 40([[R2]])
 ; O32-DAG:           sw [[R4]], 44([[R2]])
 ; NEW-DAG:           sd $8, 40([[R2]])

-; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
-; O32-DAG:           lw [[R4:\$[0-9]+]], 52($sp)
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 48($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 52($sp)
 ; O32-DAG:           sw [[R3]], 48([[R2]])
 ; O32-DAG:           sw [[R4]], 52([[R2]])
 ; NEW-DAG:           sd $9, 48([[R2]])

-; O32-DAG:           lw [[R3:\$[0-9]+]], 56($sp)
-; O32-DAG:           lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 56($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 60($sp)
 ; O32-DAG:           sw [[R3]], 56([[R2]])
 ; O32-DAG:           sw [[R4]], 60([[R2]])
 ; NEW-DAG:           sd $10, 56([[R2]])
@ -135,8 +135,8 @@ entry:
 ; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(

 ; The first four arguments are the same in O32/N32/N64.
-; The first argument isn't floating point so floating point registers are not
-; used.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
 ; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
 ; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
 ; aligned and occupying one slot. We'll use GCC's definition.
@ -195,7 +195,7 @@ entry:
 ; O32-DAG:           sw $7, 12([[R2]])
 ; NEW-DAG:           sd $5, 8([[R2]])

-define void @float_arg2(i8 %a, float %b) nounwind {
+define void @float_arg2(i8 signext %a, float %b) nounwind {
 entry:
        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
        store volatile i8 %a, i8* %0
--- a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@ -4,11 +4,11 @@
 ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
 ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s

-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s

-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s

 ; Test the effect of varargs on floating point types in the non-variable part
 ; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
@ -34,6 +34,7 @@ entry:
        %b = va_arg i8** %ap, double
        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
        store volatile double %b, double* %1
+        call void @llvm.va_end(i8* %ap2)
        ret void
 }

@ -98,6 +99,7 @@ entry:
        %b = va_arg i8** %ap, float
        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
        store volatile float %b, float* %1
+        call void @llvm.va_end(i8* %ap2)
        ret void
 }

@ -140,16 +142,18 @@ entry:
 ; Increment the pointer then get the varargs arg
 ; LLVM will rebind the load to the stack pointer instead of the varargs pointer
 ; during lowering. This is fine and doesn't change the behaviour.
-; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine
-; since the stack is always aligned.
+; Also, in big-endian mode the offset must be increased by 4 to retrieve the
+; correct half of the argument slot.
+;
 ; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 4
 ; O32-DAG:           sw [[VAPTR]], 4($sp)
-; N32-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
 ; N32-DAG:           sw [[VAPTR]], 4($sp)
-; N64-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N64-DAG:           daddiu [[VAPTR]], [[VAPTR]], 8
 ; N64-DAG:           sd [[VAPTR]], 0($sp)
 ; O32-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
-; NEW-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWLE-DAG:         lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWBE-DAG:         lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
 ; ALL-DAG:           swc1 [[FTMP1]], 8([[R2]])

 declare void @llvm.va_start(i8*)
--- a/test/CodeGen/Mips/cconv/arguments-struct.ll
+++ b/test/CodeGen/Mips/cconv/arguments-struct.ll
@ -0,0 +1,41 @@
+; RUN: llc -mtriple=mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s
+
+; Test small structures for all ABI's and byte orders.
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [2 x i8] zeroinitializer
+
+define void @s_i8(i8 inreg %a) nounwind {
+entry:
+	store i8 %a, i8* getelementptr inbounds ([2 x i8]* @bytes, i32 0, i32 1)
+        ret void
+}
+
+; ALL-LABEL: s_i8:
+
+; SYM32-DAG:   lui   [[PTR_HI:\$[0-9]+]], %hi(bytes)
+; SYM32-DAG:   addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(bytes)
+
+; SYM64-DAG:   ld    [[PTR:\$[0-9]+]], %got_disp(bytes)(
+
+; O32-BE-DAG:  srl [[ARG:\$[0-9]+]], $4, 24
+; O32-BE-DAG:  sb  [[ARG]], 1([[PTR]])
+
+; O32-LE-DAG:  sb  $4, 1([[PTR]])
+
+; NEW-BE-DAG:  dsrl [[ARG:\$[0-9]+]], $4, 56
+; NEW-BE-DAG:  sb   [[ARG]], 1([[PTR]])
+
+; NEW-LE-DAG:  sb   $4, 1([[PTR]])
--- a/test/CodeGen/Mips/cconv/arguments-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
--- a/test/CodeGen/Mips/cconv/arguments.ll
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s

 ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
 ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
@ -23,8 +23,10 @@
@floats = global [11 x float] zeroinitializer
@doubles = global [11 x double] zeroinitializer

-define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
-                                i8 %h, i8 %i, i8 %j) nounwind {
+define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c,
+                                i8 signext %d, i8 signext %e, i8 signext %f,
+                                i8 signext %g, i8 signext %h, i8 signext %i,
+                                i8 signext %j) nounwind {
 entry:
        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
        store volatile i8 %a, i8* %0
@ -53,7 +55,7 @@ entry:
 ; We won't test the way the global address is calculated in this test. This is
 ; just to get the register number for the other checks.
 ; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG:           ld [[R1:\$[0-9]+]], %got_disp(bytes)(

 ; The first four arguments are the same in O32/N32/N64
 ; ALL-DAG:           sb $4, 1([[R1]])
@ -82,15 +84,16 @@ entry:
 ; increase by 4 for O32 and 8 for N32/N64.
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
 ; O32-DAG:           sb [[R3]], 9([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
 ; NEW-DAG:           sb [[R3]], 9([[R1]])
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 36($sp)
 ; O32-DAG:           sb [[R3]], 10([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 8($sp)
 ; NEW-DAG:           sb [[R3]], 10([[R1]])

-define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
-                           i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c,
+                           i8 signext %d, i8 signext %e, i8 signext %f,
+                           i8 signext %g, i64 signext %i, i8 signext %j) nounwind {
 entry:
        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
        store volatile i8 %a, i8* %0
@ -117,9 +120,9 @@ entry:
 ; We won't test the way the global address is calculated in this test. This is
 ; just to get the register number for the other checks.
 ; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG:           ld [[R1:\$[0-9]+]], %got_disp(bytes)(
 ; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
-; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(dwords)(
+; SYM64-DAG:           ld [[R2:\$[0-9]+]], %got_disp(dwords)(

 ; The first argument is the same in O32/N32/N64.
 ; ALL-DAG:           sb $4, 1([[R1]])
@ -137,8 +140,7 @@ entry:
 ; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
 ; accept it for now. The only IR difference is that this argument has
 ; anyext from i8 and align 8 on it.
-; O32LE-DAG:           lbu [[R3:\$[0-9]+]], 16($sp)
-; O32BE-DAG:           lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
 ; O32-DAG:           sb [[R3]], 2([[R1]])
 ; NEW-DAG:           sb $6, 2([[R1]])
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
@ -166,5 +168,5 @@ entry:
 ; increase by 4 for O32 and 8 for N32/N64.
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
 ; O32-DAG:           sb [[R3]], 7([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
 ; NEW-DAG:           sb [[R3]], 7([[R1]])
--- a/test/CodeGen/Mips/cconv/return-float.ll
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@ -30,7 +30,7 @@ entry:
 ; O32-DAG:           lw $2, %lo(float)([[R1]])
 ; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
 ; N32-DAG:           lw $2, %lo(float)([[R1]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)(
 ; N64-DAG:           lw $2, 0([[R1]])

 define double @retdouble() nounwind {
@ -44,5 +44,5 @@ entry:
 ; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
 ; O32-DAG:           lw $3, 4([[R2]])
 ; N32-DAG:           ld $2, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)(
 ; N64-DAG:           ld $2, 0([[R1]])
--- a/test/CodeGen/Mips/cconv/return-hard-float.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@ -33,7 +33,7 @@ entry:
 ; O32-DAG:           lwc1 $f0, %lo(float)([[R1]])
 ; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
 ; N32-DAG:           lwc1 $f0, %lo(float)([[R1]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)(
 ; N64-DAG:           lwc1 $f0, 0([[R1]])

 define double @retdouble() nounwind {
@ -45,7 +45,7 @@ entry:
 ; ALL-LABEL: retdouble:
 ; O32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
 ; N32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)(
 ; N64-DAG:           ldc1 $f0, 0([[R1]])

 define { double, double } @retComplexDouble() #0 {
--- a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@ -0,0 +1,36 @@
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test return of {fp128} agrees with de-facto N32/N64 ABI.
+
+@struct_fp128 = global {fp128} zeroinitializer
+
+define inreg {fp128} @ret_struct_fp128() nounwind {
+entry:
+        %0 = load volatile {fp128}* @struct_fp128
+        ret {fp128} %0
+}
+
+; ALL-LABEL: ret_struct_fp128:
+
+; O32 generates different IR so we don't test it here. It returns the struct
+; indirectly.
+
+; Contrary to the N32/N64 ABI documentation, a struct containing a long double
+; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+; match the de facto ABI as implemented by GCC.
+; N32-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_fp128)
+; N32-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
+; N32-DAG:        dmtc1 [[R2]], $f0
+; N32-DAG:        addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
+; N32-DAG:        ld  [[R4:\$[0-9]+]], 8([[R3]])
+; N32-DAG:        dmtc1 [[R4]], $f1
+
+; N64-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_fp128)($1)
+; N64-DAG:        ld  [[R2:\$[0-9]+]], 0([[R1]])
+; N64-DAG:        dmtc1 [[R2]], $f0
+; N64-DAG:        ld  [[R4:\$[0-9]+]], 8([[R1]])
+; N64-DAG:        dmtc1 [[R4]], $f1
--- a/test/CodeGen/Mips/cconv/return-struct.ll
+++ b/test/CodeGen/Mips/cconv/return-struct.ll
@ -0,0 +1,232 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
+
+; Test struct returns for all ABI's and byte orders.
+
+@struct_byte = global {i8} zeroinitializer
+@struct_2byte = global {i8,i8} zeroinitializer
+@struct_3xi16 = global {[3 x i16]} zeroinitializer
+@struct_6xi32 = global {[6 x i32]} zeroinitializer
+@struct_128xi16 = global {[128 x i16]} zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define inreg {i8} @ret_struct_i8() nounwind {
+entry:
+        %0 = load volatile {i8}* @struct_byte
+        ret {i8} %0
+}
+
+; ALL-LABEL: ret_struct_i8:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; O32-DAG:           lbu $2, %lo(struct_byte)([[R1]])
+
+; N32-LE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-LE-DAG:        lb $2, %lo(struct_byte)([[R1]])
+
+; N32-BE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-BE-DAG:        lb [[R2:\$[0-9]+]], %lo(struct_byte)([[R1]])
+; N32-BE-DAG:        dsll $2, [[R2]], 56
+
+; N64-LE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-LE-DAG:        lb $2, 0([[R1]])
+
+; N64-BE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-BE-DAG:        lb [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG:        dsll $2, [[R2]], 56
+
+; This test is based on the way clang currently lowers {i8,i8} to {i16}.
+; FIXME: It should probably work for without any lowering too but this doesn't
+;        work as expected. Each member gets mapped to a register rather than
+;        packed into a single register.
+define inreg {i16} @ret_struct_i16() nounwind {
+entry:
+        %retval = alloca {i8,i8}, align 1
+        %0 = bitcast {i8,i8}* %retval to i8*
+        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+        %1 = bitcast {i8,i8}* %retval to {i16}*
+        %2 = load volatile {i16}* %1
+        ret {i16} %2
+}
+
+; ALL-LABEL: ret_struct_i16:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; O32-DAG:           lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; O32-DAG:           sh  [[R2]], 0([[SP:\$sp]])
+; O32-DAG:           lhu $2, 0([[SP:\$sp]])
+
+; N32-LE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-LE-DAG:        lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-LE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N32-LE-DAG:        lh  $2, 8([[SP:\$sp]])
+
+; N32-BE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-BE-DAG:        lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-BE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N32-BE-DAG:        lh  [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N32-BE-DAG:        dsll $2, [[R3]], 48
+
+; N64-LE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-LE-DAG:        lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-LE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N64-LE-DAG:        lh  $2, 8([[SP:\$sp]])
+
+; N64-BE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-BE-DAG:        lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N64-BE-DAG:        lh  [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N64-BE-DAG:        dsll $2, [[R3]], 48
+
+; Ensure that structures bigger than 32-bits but smaller than 64-bits are
+; also returned in the upper bits on big endian targets. Previously, these were
+; missed by the CCPromoteToType and the shift didn't happen.
+define inreg {i48} @ret_struct_3xi16() nounwind {
+entry:
+        %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
+        %1 = insertvalue {i48} undef, i48 %0, 0
+        ret {i48} %1
+}
+
+; ALL-LABEL: ret_struct_3xi16:
+
+; O32-BE-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-BE-DAG:        addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-BE-DAG:        lhu [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; O32-BE-DAG:        lw [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; O32-BE-DAG:        sll [[R3:\$[0-9]+]], [[R2]], 16
+; O32-BE-DAG:        or  $3, [[R1]], [[R3]]
+; O32-BE-DAG:        srl $2, [[R2]], 16
+
+; O32-LE-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-LE-DAG:        addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-LE-DAG:        lhu $3, 4([[PTR_LO]])
+; O32-LE-DAG:        lw $2, %lo(struct_3xi16)([[PTR_HI]])
+
+; N32-LE-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-LE-DAG:        addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-LE-DAG:        lh [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; N32-LE-DAG:        lwu [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-LE-DAG:        dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N32-LE-DAG:        or $2, [[R2]], [[R3]]
+
+; N32-BE-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-BE-DAG:        addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-BE-DAG:        lw [[R1:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-BE-DAG:        dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N32-BE-DAG:        lhu [[R3:\$[0-9]+]], 4([[PTR_LO]])
+; N32-BE-DAG:        or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG:        dsll $2, [[R4]], 16
+
+; N64-LE-DAG:        ld  [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-LE-DAG:        lh [[R1:\$[0-9]+]], 4([[PTR]])
+; N64-LE-DAG:        lwu [[R2:\$[0-9]+]], 0([[PTR]])
+; N64-LE-DAG:        dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N64-LE-DAG:        or $2, [[R2]], [[R3]]
+
+; N64-BE-DAG:        ld  [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-BE-DAG:        lw [[R1:\$[0-9]+]], 0([[PTR]])
+; N64-BE-DAG:        dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N64-BE-DAG:        lhu [[R3:\$[0-9]+]], 4([[PTR]])
+; N64-BE-DAG:        or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG:        dsll $2, [[R4]], 16
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; We pick an extremely large structure so we don't have to match inlined memcpy's.
+define void @ret_struct_128xi16({[128 x i16]}* sret %returnval) {
+entry:
+        %0 = bitcast {[128 x i16]}* %returnval to i8*
+        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i32 2, i1 false)
+        ret void
+}
+
+; ALL-LABEL: ret_struct_128xi16:
+
+; sret pointer is already in $4
+; O32-DAG:        lui [[PTR:\$[0-9]+]], %hi(struct_128xi16)
+; O32-DAG:        addiu $5, [[PTR]], %lo(struct_128xi16)
+; O32:            jal memcpy
+
+; sret pointer is already in $4
+; N32-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_128xi16)
+; N32-DAG:        addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_128xi16)
+; FIXME: This signext isn't necessary. Like integers, pointers are
+;        but unlike integers, pointers cannot have the signext attribute.
+; N32-DAG:        sll $5, [[PTR]], 0
+; N32:            jal memcpy
+
+; sret pointer is already in $4
+; N64-DAG:        ld $5, %got_disp(struct_128xi16)(
+; N64-DAG:        ld $25, %call16(memcpy)(
+; N64:            jalr $25
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; This will generate inlined memcpy's anyway so pick the smallest large
+; structure
+; This time we let the backend lower the sret argument.
+define {[6 x i32]} @ret_struct_6xi32() {
+entry:
+        %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2
+        ret {[6 x i32]} %0
+}
+
+; ALL-LABEL: ret_struct_6xi32:
+
+; sret pointer is already in $4
+; O32-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; O32-DAG:        addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; O32-DAG:        lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; O32-DAG:        lw [[T1:\$[0-9]+]], 4([[PTR]])
+; O32-DAG:        lw [[T2:\$[0-9]+]], 8([[PTR]])
+; O32-DAG:        lw [[T3:\$[0-9]+]], 12([[PTR]])
+; O32-DAG:        lw [[T4:\$[0-9]+]], 16([[PTR]])
+; O32-DAG:        lw [[T5:\$[0-9]+]], 20([[PTR]])
+; O32-DAG:        sw [[T0]], 0($4)
+; O32-DAG:        sw [[T1]], 4($4)
+; O32-DAG:        sw [[T2]], 8($4)
+; O32-DAG:        sw [[T3]], 12($4)
+; O32-DAG:        sw [[T4]], 16($4)
+; O32-DAG:        sw [[T5]], 20($4)
+
+; FIXME: This signext isn't necessary. Like integers, pointers are
+;        but unlike integers, pointers cannot have the signext attribute.
+;        In this case we don't have anywhere to put the signext either since
+;        the sret argument is invented by the backend.
+; N32-DAG:        sll [[RET_PTR:\$[0-9]+]], $4, 0
+; N32-DAG:        lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; N32-DAG:        addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; N32-DAG:        lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; N32-DAG:        lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N32-DAG:        lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N32-DAG:        lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N32-DAG:        lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N32-DAG:        lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N32-DAG:        sw [[T0]], 0([[RET_PTR]])
+; N32-DAG:        sw [[T1]], 4([[RET_PTR]])
+; N32-DAG:        sw [[T2]], 8([[RET_PTR]])
+; N32-DAG:        sw [[T3]], 12([[RET_PTR]])
+; N32-DAG:        sw [[T4]], 16([[RET_PTR]])
+; N32-DAG:        sw [[T5]], 20([[RET_PTR]])
+
+; sret pointer is already in $4
+; N64-DAG:        ld [[PTR:\$[0-9]+]], %got_disp(struct_6xi32)(
+; N64-DAG:        lw [[T0:\$[0-9]+]], 0([[PTR]])
+; N64-DAG:        lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N64-DAG:        lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N64-DAG:        lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N64-DAG:        lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N64-DAG:        lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N64-DAG:        sw [[T0]], 0($4)
+; N64-DAG:        sw [[T1]], 4($4)
+; N64-DAG:        sw [[T2]], 8($4)
+; N64-DAG:        sw [[T3]], 12($4)
+; N64-DAG:        sw [[T4]], 16($4)
+; N64-DAG:        sw [[T5]], 20($4)
--- a/test/CodeGen/Mips/cconv/return.ll
+++ b/test/CodeGen/Mips/cconv/return.ll
@ -33,7 +33,7 @@ entry:
 ; O32-DAG:           lbu $2, %lo(byte)([[R1]])
 ; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(byte)
 ; N32-DAG:           lbu $2, %lo(byte)([[R1]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(byte)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(byte)(
 ; N64-DAG:           lbu $2, 0([[R1]])

 define i32 @reti32() nounwind {
@ -47,7 +47,7 @@ entry:
 ; O32-DAG:           lw $2, %lo(word)([[R1]])
 ; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(word)
 ; N32-DAG:           lw $2, %lo(word)([[R1]])
-; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(word)($1)
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(word)(
 ; N64-DAG:           lw $2, 0([[R1]])

 define i64 @reti64() nounwind {
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@ -38,7 +38,7 @@
 ; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
 ; 64-CMP-DAG:   ld $2, 0($[[T2]])

-define i32* @cmov1(i32 %s) nounwind readonly {
+define i32* @cmov1(i32 signext %s) nounwind readonly {
 entry:
  %tobool = icmp ne i32 %s, 0
  %tmp1 = load i32** @i3, align 4
@ -78,7 +78,7 @@ entry:
 ; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
 ; 64-CMP-DAG:   lw $2, 0($[[T2]])

-define i32 @cmov2(i32 %s) nounwind readonly {
+define i32 @cmov2(i32 signext %s) nounwind readonly {
 entry:
  %tobool = icmp ne i32 %s, 0
  %tmp1 = load i32* @c, align 4
@ -109,7 +109,7 @@ entry:
 ; 64-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
 ; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]

-define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
 entry:
  %cmp = icmp eq i32 %a, 234
  %cond = select i1 %cmp, i32 %b, i32 %c
@ -142,7 +142,7 @@ entry:
 ; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $6, $[[CC]]
 ; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]

-define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3_ne(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
 entry:
  %cmp = icmp ne i32 %a, 234
  %cond = select i1 %cmp, i32 %b, i32 %c
@ -179,7 +179,7 @@ entry:
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $6, $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
 entry:
  %cmp = icmp eq i32 %a, 234
  %cond = select i1 %cmp, i64 %b, i64 %c
@ -220,7 +220,7 @@ entry:
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $6, $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4_ne(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
 entry:
  %cmp = icmp ne i32 %a, 234
  %cond = select i1 %cmp, i64 %b, i64 %c
@ -263,7 +263,7 @@ entry:
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @slti0(i32 %a) {
+define i32 @slti0(i32 signext %a) {
 entry:
  %cmp = icmp sgt i32 %a, 32766
  %cond = select i1 %cmp, i32 3, i32 5
@ -302,7 +302,7 @@ entry:
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @slti1(i32 %a) {
+define i32 @slti1(i32 signext %a) {
 entry:
  %cmp = icmp sgt i32 %a, 32767
  %cond = select i1 %cmp, i32 7, i32 5
@ -337,7 +337,7 @@ entry:
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @slti2(i32 %a) {
+define i32 @slti2(i32 signext %a) {
 entry:
  %cmp = icmp sgt i32 %a, -32769
  %cond = select i1 %cmp, i32 3, i32 5
@ -380,7 +380,7 @@ entry:
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @slti3(i32 %a) {
+define i32 @slti3(i32 signext %a) {
 entry:
  %cmp = icmp sgt i32 %a, -32770
  %cond = select i1 %cmp, i32 3, i32 5
@ -567,7 +567,7 @@ entry:
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @sltiu0(i32 %a) {
+define i32 @sltiu0(i32 signext %a) {
 entry:
  %cmp = icmp ugt i32 %a, 32766
  %cond = select i1 %cmp, i32 3, i32 5
@ -606,7 +606,7 @@ entry:
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @sltiu1(i32 %a) {
+define i32 @sltiu1(i32 signext %a) {
 entry:
  %cmp = icmp ugt i32 %a, 32767
  %cond = select i1 %cmp, i32 7, i32 5
@ -641,7 +641,7 @@ entry:
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @sltiu2(i32 %a) {
+define i32 @sltiu2(i32 signext %a) {
 entry:
  %cmp = icmp ugt i32 %a, -32769
  %cond = select i1 %cmp, i32 3, i32 5
@ -684,7 +684,7 @@ entry:
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]

-define i32 @sltiu3(i32 %a) {
+define i32 @sltiu3(i32 signext %a) {
 entry:
  %cmp = icmp ugt i32 %a, -32770
  %cond = select i1 %cmp, i32 3, i32 5
@ -697,7 +697,7 @@ entry:
 ; doesn't generate conditional moves
 ; for constant operands whose difference is |1|

-define i32 @slti4(i32 %a) nounwind readnone {
+define i32 @slti4(i32 signext %a) nounwind readnone {
  %1 = icmp slt i32 %a, 7
  %2 = select i1 %1, i32 4, i32 3
  ret i32 %2
@ -723,7 +723,7 @@ define i32 @slti4(i32 %a) nounwind readnone {
 ; 64-CMP-NOT:  seleqz
 ; 64-CMP-NOT:  selnez

-define i32 @slti5(i32 %a) nounwind readnone {
+define i32 @slti5(i32 signext %a) nounwind readnone {
  %1 = icmp slt i32 %a, 7
  %2 = select i1 %1, i32 -3, i32 -4
  ret i32 %2
@ -749,7 +749,7 @@ define i32 @slti5(i32 %a) nounwind readnone {
 ; 64-CMP-NOT:  seleqz
 ; 64-CMP-NOT:  selnez

-define i32 @slti6(i32 %a) nounwind readnone {
+define i32 @slti6(i32 signext %a) nounwind readnone {
  %1 = icmp slt i32 %a, 7
  %2 = select i1 %1, i32 3, i32 4
  ret i32 %2
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@ -5,7 +5,7 @@
 ; CHECK: sll $[[R0:[0-9]+]], $4, 2
 ; CHECK: addu ${{[0-9]+}}, $[[R0]], $4

-define i32 @mul5_32(i32 %a) {
+define i32 @mul5_32(i32 signext %a) {
 entry:
  %mul = mul nsw i32 %a, 5
  ret i32 %mul
@ -17,7 +17,7 @@ entry:
 ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
 ; CHECK:     subu ${{[0-9]+}}, $[[R2]], $[[R1]]

-define i32 @mul27_32(i32 %a) {
+define i32 @mul27_32(i32 signext %a) {
 entry:
  %mul = mul nsw i32 %a, 27
  ret i32 %mul
@ -29,7 +29,7 @@ entry:
 ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
 ; CHECK:     addu ${{[0-9]+}}, $[[R2]], $[[R1]]

-define i32 @muln2147483643_32(i32 %a) {
+define i32 @muln2147483643_32(i32 signext %a) {
 entry:
  %mul = mul nsw i32 %a, -2147483643
  ret i32 %mul
@ -41,7 +41,7 @@ entry:
 ; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
 ; CHECK64:     daddu ${{[0-9]+}}, $[[R2]], $[[R1]]

-define i64 @muln9223372036854775805_64(i64 %a) {
+define i64 @muln9223372036854775805_64(i64 signext %a) {
 entry:
  %mul = mul nsw i64 %a, -9223372036854775805
  ret i64 %mul
--- a/test/CodeGen/Mips/countleading.ll
+++ b/test/CodeGen/Mips/countleading.ll
@ -11,7 +11,7 @@
 ;   MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
 ;   MIPS64-GT-R1 - MIPS64r1 and above

-define i32 @ctlz_i32(i32 %X) nounwind readnone {
+define i32 @ctlz_i32(i32 signext %X) nounwind readnone {
 entry:
 ; ALL-LABEL: ctlz_i32:

@ -27,7 +27,7 @@ entry:

 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone

-define i32 @ctlo_i32(i32 %X) nounwind readnone {
+define i32 @ctlo_i32(i32 signext %X) nounwind readnone {
 entry:
 ; ALL-LABEL: ctlo_i32:

--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@ -27,7 +27,7 @@
@g0 = common global i32 0, align 4
@g1 = common global i32 0, align 4

-define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: sdiv1:

@ -54,7 +54,7 @@ entry:
  ret i32 %div
 }

-define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: srem1:

@ -81,7 +81,7 @@ entry:
  ret i32 %rem
 }

-define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: udiv1:

@ -107,7 +107,7 @@ entry:
  ret i32 %div
 }

-define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: urem1:

@ -134,7 +134,7 @@ entry:
  ret i32 %rem
 }

-define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @sdivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind {
 entry:
 ; ALL-LABEL: sdivrem1:

@ -175,7 +175,7 @@ entry:
  ret i32 %div
 }

-define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind {
 entry:
 ; ALL-LABEL: udivrem1:

--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@ -1,5 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck  -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck  -check-prefix=CHECK64 %s
+; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=CHECK64 %s

 define i32 @main() {
 ; CHECK: .cfi_startproc
@ -27,8 +29,11 @@ declare void @foo()
 ; CHECK: .hidden DW.ref.__gxx_personality_v0
 ; CHECK: .weak DW.ref.__gxx_personality_v0
 ; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
-; CHECK: .align 2
+; CHECK32: .align 2
+; CHECK64: .align 3
 ; CHECK: .type DW.ref.__gxx_personality_v0,@object
-; CHECK: .size DW.ref.__gxx_personality_v0, 4
+; CHECK32: .size DW.ref.__gxx_personality_v0, 4
+; CHECK64: .size DW.ref.__gxx_personality_v0, 8
 ; CHECK: DW.ref.__gxx_personality_v0:
-; CHECK: .4byte __gxx_personality_v0
+; CHECK32: .4byte __gxx_personality_v0
+; CHECK64: .8byte __gxx_personality_v0
--- a/test/CodeGen/Mips/fastcc.ll
+++ b/test/CodeGen/Mips/fastcc.ll
@ -2,6 +2,7 @@
 ; RUN: llc  < %s -mtriple=mipsel-none-nacl-gnu \
 ; RUN:  | FileCheck %s -check-prefix=CHECK-NACL
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 -mattr=+fp64,+nooddspreg | FileCheck %s -check-prefix=FP64-NOODDSPREG


@gi0 = external global i32
@ -82,6 +83,7 @@
@g16 = external global i32

@fa = common global [11 x float] zeroinitializer, align 4
+@da = common global [11 x double] zeroinitializer, align 8

 define void @caller0() nounwind {
 entry:
@ -270,7 +272,7 @@ entry:
 define void @caller2() {
 entry:

-; NOODDSPREG-LABEL:  caller2
+; NOODDSPREG-LABEL:  caller2:

 ; Check that first 10 arguments are passed in even float registers
 ; f0, f2, ... , f18. Check that 11th argument is passed on stack.
@ -312,7 +314,7 @@ define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3,
                            float %a8, float %a9, float %a10) {
 entry:

-; NOODDSPREG-LABEL:  callee2
+; NOODDSPREG-LABEL:  callee2:

 ; NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]

@ -348,3 +350,83 @@ entry:
  ret void
 }

+define void @caller3() {
+entry:
+
+; FP64-NOODDSPREG-LABEL:  caller3:
+
+; Check that first 10 arguments are passed in even float registers
+; f0, f2, ... , f18. Check that 11th argument is passed on stack.
+
+; FP64-NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG:    ldc1    $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG:    ldc1    $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG:    ldc1    $[[F0:f[0-9]*[02468]]], 80($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $[[F0]], 0($sp)
+
+  %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+  %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+  %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+  %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+  %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+  %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+  %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+  %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+  %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+  %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+  %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+  tail call fastcc void @callee3(double %0, double %1, double %2, double %3,
+                                 double %4, double %5, double %6, double %7,
+                                 double %8, double %9, double %10)
+  ret void
+}
+
+define fastcc void @callee3(double %a0, double %a1, double %a2, double %a3,
+                            double %a4, double %a5, double %a6, double %a7,
+                            double %a8, double %a9, double %a10) {
+entry:
+
+; FP64-NOODDSPREG-LABEL:  callee3:
+
+; FP64-NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]
+
+; Check that first 10 arguments are received in even float registers
+; f0, f2, ... , f18. Check that 11th argument is received on stack.
+
+; FP64-NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG:    sdc1    $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG:    sdc1    $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG:    ldc1    $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
+; FP64-NOODDSPREG-DAG:    sdc1    $[[F0]], 80($[[R0]])
+
+  store double %a0, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+  store double %a1, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+  store double %a2, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+  store double %a3, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+  store double %a4, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+  store double %a5, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+  store double %a6, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+  store double %a7, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+  store double %a8, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+  store double %a9, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+  store double %a10, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+  ret void
+}
--- a/test/CodeGen/Mips/fp64a.ll
+++ b/test/CodeGen/Mips/fp64a.ll
@ -12,9 +12,9 @@
 ;        this check here.

 ; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A
 ; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE
-; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A

 ; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
 ; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
@ -38,15 +38,10 @@ define double @call1(double %d, ...) {
 ; 32R2-NO-FP64A-BE:         mtc1    $5, $f0
 ; 32R2-NO-FP64A-BE:         mthc1   $4, $f0

-; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
-; 32R2-FP64A-LE:            sw      $4, 0($sp)
-; 32R2-FP64A-LE:            sw      $5, 4($sp)
-; 32R2-FP64A-LE:            ldc1    $f0, 0($sp)
-
-; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
-; 32R2-FP64A-BE:            sw      $5, 0($sp)
-; 32R2-FP64A-BE:            sw      $4, 4($sp)
-; 32R2-FP64A-BE:            ldc1    $f0, 0($sp)
+; 32R2-FP64A:               addiu   $sp, $sp, -8
+; 32R2-FP64A:               sw      $4, 0($sp)
+; 32R2-FP64A:               sw      $5, 4($sp)
+; 32R2-FP64A:               ldc1    $f0, 0($sp)

 ; 64-NO-FP64A:              daddiu  $sp, $sp, -64
 ; 64-NO-FP64A:              mov.d   $f0, $f12
@ -63,15 +58,10 @@ define double @call2(i32 %i, double %d) {
 ; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
 ; 32R2-NO-FP64A-BE:     mthc1   $6, $f0

-; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-LE:        sw      $6, 0($sp)
-; 32R2-FP64A-LE:        sw      $7, 4($sp)
-; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
-
-; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-BE:        sw      $7, 0($sp)
-; 32R2-FP64A-BE:        sw      $6, 4($sp)
-; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+; 32R2-FP64A:           addiu   $sp, $sp, -8
+; 32R2-FP64A:           sw      $6, 0($sp)
+; 32R2-FP64A:           sw      $7, 4($sp)
+; 32R2-FP64A:           ldc1    $f0, 0($sp)

 ; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
 ; 64-NO-FP64A:          mov.d   $f0, $f13
@ -88,15 +78,10 @@ define double @call3(float %f1, float %f2, double %d) {
 ; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
 ; 32R2-NO-FP64A-BE:     mthc1   $6, $f0

-; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-LE:        sw      $6, 0($sp)
-; 32R2-FP64A-LE:        sw      $7, 4($sp)
-; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
-
-; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-BE:        sw      $7, 0($sp)
-; 32R2-FP64A-BE:        sw      $6, 4($sp)
-; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+; 32R2-FP64A:           addiu   $sp, $sp, -8
+; 32R2-FP64A:           sw      $6, 0($sp)
+; 32R2-FP64A:           sw      $7, 4($sp)
+; 32R2-FP64A:           ldc1    $f0, 0($sp)

 ; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
 ; 64-NO-FP64A:          mov.d   $f0, $f14
@ -113,15 +98,10 @@ define double @call4(float %f, double %d, ...) {
 ; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
 ; 32R2-NO-FP64A-BE:     mthc1   $6, $f0

-; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-LE:        sw      $6, 0($sp)
-; 32R2-FP64A-LE:        sw      $7, 4($sp)
-; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
-
-; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
-; 32R2-FP64A-BE:        sw      $7, 0($sp)
-; 32R2-FP64A-BE:        sw      $6, 4($sp)
-; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+; 32R2-FP64A:           addiu   $sp, $sp, -8
+; 32R2-FP64A:           sw      $6, 0($sp)
+; 32R2-FP64A:           sw      $7, 4($sp)
+; 32R2-FP64A:           ldc1    $f0, 0($sp)

 ; 64-NO-FP64A:          daddiu  $sp, $sp, -48
 ; 64-NO-FP64A:          mov.d   $f0, $f13
@ -145,23 +125,14 @@ define double @call5(double %a, double %b, ...) {
 ; 32R2-NO-FP64A-BE-DAG:     mthc1   $6, $[[T1:f[0-9]+]]
 ; 32R2-NO-FP64A-BE:         sub.d   $f0, $[[T0]], $[[T1]]

-; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
-; 32R2-FP64A-LE:            sw      $6, 0($sp)
-; 32R2-FP64A-LE:            sw      $7, 4($sp)
-; 32R2-FP64A-LE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE:            sw      $4, 0($sp)
-; 32R2-FP64A-LE:            sw      $5, 4($sp)
-; 32R2-FP64A-LE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE:            sub.d   $f0, $[[T0]], $[[T1]]
-
-; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
-; 32R2-FP64A-BE:            sw      $7, 0($sp)
-; 32R2-FP64A-BE:            sw      $6, 4($sp)
-; 32R2-FP64A-BE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE:            sw      $5, 0($sp)
-; 32R2-FP64A-BE:            sw      $4, 4($sp)
-; 32R2-FP64A-BE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE:            sub.d   $f0, $[[T0]], $[[T1]]
+; 32R2-FP64A:               addiu   $sp, $sp, -8
+; 32R2-FP64A:               sw      $6, 0($sp)
+; 32R2-FP64A:               sw      $7, 4($sp)
+; 32R2-FP64A:               ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A:               sw      $4, 0($sp)
+; 32R2-FP64A:               sw      $5, 4($sp)
+; 32R2-FP64A:               ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A:               sub.d   $f0, $[[T0]], $[[T1]]

 ; 64-NO-FP64A:              sub.d   $f0, $f12, $f13
 }
@ -179,19 +150,12 @@ define double @move_from(double %d) {
 ; 32R2-NO-FP64A-BE-DAG: mfc1    $7, $f0
 ; 32R2-NO-FP64A-BE-DAG: mfhc1   $6, $f0

-; 32R2-FP64A-LE:        addiu   $sp, $sp, -32
-; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
-; 32R2-FP64A-LE:        lw      $6, 16($sp)
+; 32R2-FP64A:           addiu   $sp, $sp, -32
+; 32R2-FP64A:           sdc1    $f0, 16($sp)
+; 32R2-FP64A:           lw      $6, 16($sp)
 ; FIXME: This store is redundant
-; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
-; 32R2-FP64A-LE:        lw      $7, 20($sp)
-
-; 32R2-FP64A-BE:        addiu   $sp, $sp, -32
-; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
-; 32R2-FP64A-BE:        lw      $6, 20($sp)
-; FIXME: This store is redundant
-; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
-; 32R2-FP64A-BE:        lw      $7, 16($sp)
+; 32R2-FP64A:           sdc1    $f0, 16($sp)
+; 32R2-FP64A:           lw      $7, 20($sp)

 ; 64-NO-FP64A:          mov.d   $f13, $f0
 }
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@ -65,6 +65,33 @@ entry:
 ;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},$0
 ;CHECK_LITTLE_32:    #NO_APP
  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+
+; z with non-zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+  call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind
+
+; z with zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    mtc0 $0, ${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+  call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind
+
+; z with non-zero and just the "r"(register) constraint
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+  call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind
+
+; z with zero and just the "r"(register) constraint
+; FIXME: Check for $0, instead of other registers.
+;        We should be using $0 directly in this case, not real registers.
+;        When the materialization of 0 gets fixed, this test will fail.
+;CHECK_LITTLE_32:    #APP
+;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32:    #NO_APP
+  call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind
  ret i32 0
 }

--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@ -47,7 +47,7 @@ entry:
  ret i32 %0
 }

-define void @store_SI(i32 %a) nounwind {
+define void @store_SI(i32 signext %a) nounwind {
 entry:
 ; ALL-LABEL: store_SI:

@ -201,7 +201,7 @@ entry:
  ret void
 }

-define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+define void @store_SI_trunc_from_i64(i32 signext %a) nounwind {
 entry:
 ; ALL-LABEL: store_SI_trunc_from_i64:

--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@ -13,7 +13,7 @@

@x = external global i32

-define void @test1(i32 %s) {
+define void @test1(i32 signext %s) {
 entry:
  %cmp = icmp eq i32 %s, 0
  br i1 %cmp, label %end, label %then
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@ -76,26 +76,14 @@ entry:
 ; 32R6-DAG:      muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
 ; 32R6-DAG:      addu $2, $[[T3]], $[[T2]]

-; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG:        daddu $2, $[[T4]], $[[T6]]
+; 64-DAG:        d[[m:m]]ult $5, $4
+; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG:        daddu $2, $[[T0]], $6

-; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG:      daddu $2, $[[T4]], $[[T6]]
+; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      daddu $2, $[[T0]], $6

-define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
 entry:
  %conv = zext i32 %a to i64
  %conv2 = zext i32 %b to i64
@ -214,26 +202,14 @@ entry:
 ; 32R6-DAG:      negu $2, $[[T3]]
 ; 32R6-DAG:      subu $3, $6, $[[T1]]

-; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG:        dsubu $2, $[[T6]], $[[T4]]
+; 64-DAG:        d[[m:m]]ult $5, $4
+; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG:        dsubu $2, $6, $[[T0]]

-; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG:      dsubu $2, $[[T6]], $[[T4]]
+; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      dsubu $2, $6, $[[T0]]

-define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
 entry:
  %conv = zext i32 %c to i64
  %conv2 = zext i32 %a to i64
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@ -114,7 +114,7 @@ entry:
 ; ALL-LABEL: conv_LD_UInt:
 ; ALL: ld $25, %call16(__floatunsitf)

-define fp128 @conv_LD_UInt(i32 %a) {
+define fp128 @conv_LD_UInt(i32 signext %a) {
 entry:
  %conv = uitofp i32 %a to fp128
  ret fp128 %conv
@ -635,7 +635,7 @@ entry:
 ; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
 ; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]

-define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+define fp128 @select_LD(i32 signext %a, i64, fp128 %b, fp128 %c) {
 entry:
  %tobool = icmp ne i32 %a, 0
  %cond = select i1 %tobool, fp128 %b, fp128 %c
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@ -11,7 +11,7 @@ entry:
  ret void
 }

-define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
+define void @bar(i32 signext %v, i32* noalias sret %agg.result) nounwind {
 entry:
 ; CHECK-LABEL: bar:
 ; CHECK: sw $4, 0($5)
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@ -36,10 +36,10 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
  %2 = alloca [497 x i8] ; Push the frame just over 512 bytes

  %3 = load volatile <16 x i8>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <16 x i8> %3, <16 x i8>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])

  ret void
@ -53,12 +53,12 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes

  %3 = load volatile <16 x i8>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <16 x i8> %3, <16 x i8>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])

  ret void
@ -72,12 +72,12 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes

  %3 = load volatile <16 x i8>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <16 x i8> %3, <16 x i8>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])

  ret void
@ -107,10 +107,10 @@ define void @loadstore_v8i16_unaligned() nounwind {
  %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0

  %6 = load volatile <8 x i16>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <8 x i16> %6, <8 x i16>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])

  ret void
@ -139,10 +139,10 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
  %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes

  %3 = load volatile <8 x i16>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <8 x i16> %3, <8 x i16>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])

  ret void
@ -156,12 +156,12 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes

  %3 = load volatile <8 x i16>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <8 x i16> %3, <8 x i16>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])

  ret void
@ -175,12 +175,12 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes

  %3 = load volatile <8 x i16>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <8 x i16> %3, <8 x i16>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.h [[R1]], 0([[BASE]])

  ret void
@ -210,10 +210,10 @@ define void @loadstore_v4i32_unaligned() nounwind {
  %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0

  %6 = load volatile <4 x i32>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <4 x i32> %6, <4 x i32>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])

  ret void
@ -242,10 +242,10 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
  %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes

  %3 = load volatile <4 x i32>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <4 x i32> %3, <4 x i32>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])

  ret void
@ -259,12 +259,12 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes

  %3 = load volatile <4 x i32>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <4 x i32> %3, <4 x i32>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])

  ret void
@ -278,12 +278,12 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes

  %3 = load volatile <4 x i32>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <4 x i32> %3, <4 x i32>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.w [[R1]], 0([[BASE]])

  ret void
@ -313,10 +313,10 @@ define void @loadstore_v2i64_unaligned() nounwind {
  %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0

  %6 = load volatile <2 x i64>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <2 x i64> %6, <2 x i64>* %5
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])

  ret void
@ -345,10 +345,10 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
  %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes

  %3 = load volatile <2 x i64>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <2 x i64> %3, <2 x i64>* %1
-  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+  ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])

  ret void
@ -362,12 +362,12 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes

  %3 = load volatile <2 x i64>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <2 x i64> %3, <2 x i64>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])

  ret void
@ -381,12 +381,12 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes

  %3 = load volatile <2 x i64>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
  store volatile <2 x i64> %3, <2 x i64>* %1
-  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
-  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
  ; MIPS32-AE: st.d [[R1]], 0([[BASE]])

  ret void
--- a/test/CodeGen/Mips/octeon_popcnt.ll
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@ -6,7 +6,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
  ret i8 %cnt
 ; OCTEON-LABEL: cnt8:
 ; OCTEON: jr   $ra
-; OCTEON: pop  $2, $1
+; OCTEON: pop  $2, [[R1:\$[0-9]+]]
 ; MIPS64-LABEL: cnt8:
 ; MIPS64-NOT: pop
 }
@ -16,12 +16,12 @@ define i16 @cnt16(i16 %x) nounwind readnone {
  ret i16 %cnt
 ; OCTEON-LABEL: cnt16:
 ; OCTEON: jr   $ra
-; OCTEON: pop  $2, $1
+; OCTEON: pop  $2, [[R1:\$[0-9]+]]
 ; MIPS64-LABEL: cnt16:
 ; MIPS64-NOT: pop
 }

-define i32 @cnt32(i32 %x) nounwind readnone {
+define i32 @cnt32(i32 zeroext %x) nounwind readnone {
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
 ; OCTEON-LABEL: cnt32:
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@ -8,7 +8,7 @@
@d2 = external global double
@d3 = external global double

-define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 signext %s, i32 signext %f0, i32 signext %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_i32_val:

@ -37,7 +37,7 @@ entry:
  ret i32 %cond
 }

-define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 signext %s, i64 %f0, i64 %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_i64_val:

@ -128,7 +128,7 @@ entry:
  ret i64 %cond
 }

-define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+define float @i32_icmp_ne_f32_val(i32 signext %s, float %f0, float %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_f32_val:

@ -161,7 +161,7 @@ entry:
  ret float %cond
 }

-define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 signext %s, double %f0, double %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_f64_val:

@ -496,7 +496,7 @@ entry:
  ret float %cond
 }

-define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_oeq_i32_val:

@ -541,7 +541,7 @@ entry:
  ret i32 %cond
 }

-define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_olt_i32_val:

@ -585,7 +585,7 @@ entry:
  ret i32 %cond
 }

-define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_ogt_i32_val:

@ -630,7 +630,7 @@ entry:
  ret i32 %cond
 }

-define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_oeq_i32_val:

@ -707,7 +707,7 @@ entry:
  ret i32 %cond
 }

-define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_olt_i32_val:

@ -784,7 +784,7 @@ entry:
  ret i32 %cond
 }

-define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_ogt_i32_val:

--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@ -8,7 +8,7 @@

@g1 = external global i32

-define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 signext %s) nounwind readonly {
 entry:
 ; ALL-LABEL: sel_icmp_nez_i32_z0:

@ -30,7 +30,7 @@ entry:
  ret i32 %cond
 }

-define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 signext %s) nounwind readonly {
 entry:
 ; ALL-LABEL: sel_icmp_nez_i32_z1:

--- a/test/CodeGen/PowerPC/blockaddress.ll
+++ b/test/CodeGen/PowerPC/blockaddress.ll
@ -0,0 +1,26 @@
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+
+define i8* @test() {
+entry:
+  br label %here
+
+here:                                             ; preds = %entry
+; MEDIUM: .Ltmp[[TMP0:[0-9]+]]:
+; MEDIUM: addis [[R0:[0-9]+]], 2, .LC[[LC0:[0-9]+]]@toc@ha
+; MEDIUM: ld 3, .LC[[LC0]]@toc@l([[R0]])
+; MEDIUM: blr
+; MEDIUM: .LC[[LC0]]:
+; MEDIUM: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+; SMALL: .Ltmp[[TMP0:[0-9]+]]:
+; SMALL: ld 3, .LC[[LC0:[0-9]+]]@toc(2)
+; SMALL: blr
+; SMALL: .LC[[LC0]]:
+; SMALL: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+  ret i8* blockaddress(@test, %here)
+}
+
--- a/test/CodeGen/PowerPC/cc.ll
+++ b/test/CodeGen/PowerPC/cc.ll
@ -41,7 +41,7 @@ entry:
  br label %foo

 foo:
-  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
  br i1 %c, label %bar, label %end

 bar:
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE
 ; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970

 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
@ -9,12 +10,16 @@
 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i64
+; ELF64LE: sitofp_single_i64
 ; PPC970: sitofp_single_i64
  %b.addr = alloca float, align 4
  %conv = sitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
 ; PPC970: std
 ; PPC970: lfd
 ; PPC970: fcfid
@ -26,12 +31,20 @@ entry:
 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i32
+; ELF64LE: sitofp_single_i32
 ; PPC970: sitofp_single_i32
  %b.addr = alloca float, align 4
  %conv = sitofp i32 %a to float
 ; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 
 ; ELF64: lfiwax
 ; ELF64: fcfids
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfids
 ; PPC970: std
 ; PPC970: lfd
 ; PPC970: fcfid
@ -43,6 +56,7 @@ entry:
 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i16
+; ELF64LE: sitofp_single_i16
 ; PPC970: sitofp_single_i16
  %b.addr = alloca float, align 4
  %conv = sitofp i16 %a to float
@ -50,6 +64,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
 ; PPC970: extsh
 ; PPC970: std
 ; PPC970: lfd
@ -62,6 +80,7 @@ entry:
 define void @sitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i8
+; ELF64LE: sitofp_single_i8
 ; PPC970: sitofp_single_i8
  %b.addr = alloca float, align 4
  %conv = sitofp i8 %a to float
@ -69,6 +88,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
 ; PPC970: extsb
 ; PPC970: std
 ; PPC970: lfd
@ -81,12 +104,20 @@ entry:
 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i32
+; ELF64LE: sitofp_double_i32
 ; PPC970: sitofp_double_i32
  %b.addr = alloca double, align 8
  %conv = sitofp i32 %a to double
 ; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
 ; ELF64: lfiwax
 ; ELF64: fcfid
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfid
 ; PPC970: std
 ; PPC970: lfd
 ; PPC970: fcfid
@ -97,12 +128,16 @@ entry:
 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i64
+; ELF64LE: sitofp_double_i64
 ; PPC970: sitofp_double_i64
  %b.addr = alloca double, align 8
  %conv = sitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
 ; PPC970: std
 ; PPC970: lfd
 ; PPC970: fcfid
@ -113,6 +148,7 @@ entry:
 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i16
+; ELF64LE: sitofp_double_i16
 ; PPC970: sitofp_double_i16
  %b.addr = alloca double, align 8
  %conv = sitofp i16 %a to double
@ -120,6 +156,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
 ; PPC970: extsh
 ; PPC970: std
 ; PPC970: lfd
@ -131,6 +171,7 @@ entry:
 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i8
+; ELF64LE: sitofp_double_i8
 ; PPC970: sitofp_double_i8
  %b.addr = alloca double, align 8
  %conv = sitofp i8 %a to double
@ -138,6 +179,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
 ; PPC970: extsb
 ; PPC970: std
 ; PPC970: lfd
@ -151,12 +196,16 @@ entry:
 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i64
+; ELF64LE: uitofp_single_i64
 ; PPC970: uitofp_single_i64
  %b.addr = alloca float, align 4
  %conv = uitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
 ; PPC970-NOT: fcfidus
  store float %conv, float* %b.addr, align 4
  ret void
@ -165,12 +214,20 @@ entry:
 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i32
+; ELF64LE: uitofp_single_i32
 ; PPC970: uitofp_single_i32
  %b.addr = alloca float, align 4
  %conv = uitofp i32 %a to float
 ; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
 ; ELF64: lfiwzx
 ; ELF64: fcfidus
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidus
 ; PPC970-NOT: lfiwzx
 ; PPC970-NOT: fcfidus
  store float %conv, float* %b.addr, align 4
@ -180,6 +237,7 @@ entry:
 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i16
+; ELF64LE: uitofp_single_i16
 ; PPC970: uitofp_single_i16
  %b.addr = alloca float, align 4
  %conv = uitofp i16 %a to float
@ -187,6 +245,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
 ; PPC970: std
 ; PPC970: lfd
@ -199,6 +261,7 @@ entry:
 define void @uitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i8
+; ELF64LE: uitofp_single_i8
 ; PPC970: uitofp_single_i8
  %b.addr = alloca float, align 4
  %conv = uitofp i8 %a to float
@ -206,6 +269,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
 ; PPC970: std
 ; PPC970: lfd
@ -218,12 +285,16 @@ entry:
 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i64
+; ELF64LE: uitofp_double_i64
 ; PPC970: uitofp_double_i64
  %b.addr = alloca double, align 8
  %conv = uitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
 ; PPC970-NOT: fcfidu
  store double %conv, double* %b.addr, align 8
  ret void
@ -232,12 +303,20 @@ entry:
 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i32
+; ELF64LE: uitofp_double_i32
 ; PPC970: uitofp_double_i32
  %b.addr = alloca double, align 8
  %conv = uitofp i32 %a to double
 ; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
 ; ELF64: lfiwzx
 ; ELF64: fcfidu
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidu
 ; PPC970-NOT: lfiwzx
 ; PPC970-NOT: fcfidu
  store double %conv, double* %b.addr, align 8
@ -247,6 +326,7 @@ entry:
 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i16
+; ELF64LE: uitofp_double_i16
 ; PPC970: uitofp_double_i16
  %b.addr = alloca double, align 8
  %conv = uitofp i16 %a to double
@ -254,6 +334,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
 ; PPC970: std
 ; PPC970: lfd
@ -265,6 +349,7 @@ entry:
 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i8
+; ELF64LE: uitofp_double_i8
 ; PPC970: uitofp_double_i8
  %b.addr = alloca double, align 8
  %conv = uitofp i8 %a to double
@ -272,6 +357,10 @@ entry:
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
 ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
 ; PPC970: std
 ; PPC970: lfd
@ -285,12 +374,16 @@ entry:
 define void @fptosi_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i32
+; ELF64LE: fptosi_float_i32
 ; PPC970: fptosi_float_i32
  %b.addr = alloca i32, align 4
  %conv = fptosi float %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
 ; PPC970: fctiwz
 ; PPC970: stfd
 ; PPC970: lwa
@ -301,12 +394,16 @@ entry:
 define void @fptosi_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i64
+; ELF64LE: fptosi_float_i64
 ; PPC970: fptosi_float_i64
  %b.addr = alloca i64, align 4
  %conv = fptosi float %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
 ; PPC970: fctidz
 ; PPC970: stfd
 ; PPC970: ld
@ -317,12 +414,16 @@ entry:
 define void @fptosi_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i32
+; ELF64LE: fptosi_double_i32
 ; PPC970: fptosi_double_i32
  %b.addr = alloca i32, align 8
  %conv = fptosi double %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
 ; PPC970: fctiwz
 ; PPC970: stfd
 ; PPC970: lwa
@ -333,12 +434,16 @@ entry:
 define void @fptosi_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i64
+; ELF64LE: fptosi_double_i64
 ; PPC970: fptosi_double_i64
  %b.addr = alloca i64, align 8
  %conv = fptosi double %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
 ; PPC970: fctidz
 ; PPC970: stfd
 ; PPC970: ld
@ -351,12 +456,16 @@ entry:
 define void @fptoui_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i32
+; ELF64LE: fptoui_float_i32
 ; PPC970: fptoui_float_i32
  %b.addr = alloca i32, align 4
  %conv = fptoui float %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
 ; PPC970: fctidz
 ; PPC970: stfd
 ; PPC970: lwz
@ -367,12 +476,16 @@ entry:
 define void @fptoui_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i64
+; ELF64LE: fptoui_float_i64
 ; PPC970: fptoui_float_i64
  %b.addr = alloca i64, align 4
  %conv = fptoui float %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
 ; PPC970-NOT: fctiduz
  store i64 %conv, i64* %b.addr, align 4
  ret void
@ -381,12 +494,16 @@ entry:
 define void @fptoui_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i32
+; ELF64LE: fptoui_double_i32
 ; PPC970: fptoui_double_i32
  %b.addr = alloca i32, align 8
  %conv = fptoui double %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
 ; PPC970: fctidz
 ; PPC970: stfd
 ; PPC970: lwz
@ -397,12 +514,16 @@ entry:
 define void @fptoui_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i64
+; ELF64LE: fptoui_double_i64
 ; PPC970: fptoui_double_i64
  %b.addr = alloca i64, align 8
  %conv = fptoui double %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
 ; PPC970-NOT: fctiduz
  store i64 %conv, i64* %b.addr, align 8
  ret void
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@ -1,8 +1,40 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64

+define zeroext i1 @rettrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: rettrue
+; ELF64: li 3, 1
+; ELF64: blr
+  ret i1 true
+}
+
+define zeroext i1 @retfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retfalse
+; ELF64: li 3, 0
+; ELF64: blr
+  ret i1 false
+}
+
+define signext i1 @retstrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retstrue
+; ELF64: li 3, -1
+; ELF64: blr
+  ret i1 true
+}
+
+define signext i1 @retsfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retsfalse
+; ELF64: li 3, 0
+; ELF64: blr
+  ret i1 false
+}
+
 define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret2
+; ELF64-LABEL: ret2
 ; ELF64: extsb
 ; ELF64: blr
  ret i8 %a
@ -10,7 +42,7 @@ entry:

 define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret3
+; ELF64-LABEL: ret3
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 ; ELF64: blr
  ret i8 %a
@ -18,7 +50,7 @@ entry:

 define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret4
+; ELF64-LABEL: ret4
 ; ELF64: extsh
 ; ELF64: blr
  ret i16 %a
@ -26,7 +58,7 @@ entry:

 define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret5
+; ELF64-LABEL: ret5
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: blr
  ret i16 %a
@ -34,7 +66,7 @@ entry:

 define i16 @ret6(i16 %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret6
+; ELF64-LABEL: ret6
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: blr
  ret i16 %a
@ -42,7 +74,7 @@ entry:

 define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret7
+; ELF64-LABEL: ret7
 ; ELF64: extsw
 ; ELF64: blr
  ret i32 %a
@ -50,7 +82,7 @@ entry:

 define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret8
+; ELF64-LABEL: ret8
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
 ; ELF64: blr
  ret i32 %a
@ -58,7 +90,7 @@ entry:

 define i32 @ret9(i32 %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret9
+; ELF64-LABEL: ret9
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
 ; ELF64: blr
  ret i32 %a
@ -66,7 +98,7 @@ entry:

 define i64 @ret10(i64 %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret10
+; ELF64-LABEL: ret10
 ; ELF64-NOT: exts
 ; ELF64-NOT: rldicl
 ; ELF64: blr
@ -75,21 +107,21 @@ entry:

 define float @ret11(float %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret11
+; ELF64-LABEL: ret11
 ; ELF64: blr
  ret float %a
 }

 define double @ret12(double %a) nounwind uwtable ssp {
 entry:
-; ELF64: ret12
+; ELF64-LABEL: ret12
 ; ELF64: blr
  ret double %a
 }

 define i8 @ret13() nounwind uwtable ssp {
 entry:
-; ELF64: ret13
+; ELF64-LABEL: ret13
 ; ELF64: li
 ; ELF64: blr
  ret i8 15;
@ -97,7 +129,7 @@ entry:

 define i16 @ret14() nounwind uwtable ssp {
 entry:
-; ELF64: ret14
+; ELF64-LABEL: ret14
 ; ELF64: li
 ; ELF64: blr
  ret i16 -225;
@ -105,7 +137,7 @@ entry:

 define i32 @ret15() nounwind uwtable ssp {
 entry:
-; ELF64: ret15
+; ELF64-LABEL: ret15
 ; ELF64: lis
 ; ELF64: ori
 ; ELF64: blr
@ -114,7 +146,7 @@ entry:

 define i64 @ret16() nounwind uwtable ssp {
 entry:
-; ELF64: ret16
+; ELF64-LABEL: ret16
 ; ELF64: li
 ; ELF64: sldi
 ; ELF64: oris
@ -125,7 +157,7 @@ entry:

 define float @ret17() nounwind uwtable ssp {
 entry:
-; ELF64: ret17
+; ELF64-LABEL: ret17
 ; ELF64: addis
 ; ELF64: lfs
 ; ELF64: blr
@ -134,7 +166,7 @@ entry:

 define double @ret18() nounwind uwtable ssp {
 entry:
-; ELF64: ret18
+; ELF64-LABEL: ret18
 ; ELF64: addis
 ; ELF64: lfd
 ; ELF64: blr
--- a/test/CodeGen/PowerPC/ia-mem-r0.ll
+++ b/test/CodeGen/PowerPC/ia-mem-r0.ll
@ -0,0 +1,94 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Make sure that we don't generate a std r, 0(0) -- the memory address cannot
+; be stored in r0.
+; CHECK-LABEL: @test1
+; CHECK-NOT: std {{[0-9]+}}, 0(0) 
+; CHECK: blr
+
+define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) {
+  %fn = alloca { i8*, void (i8*, i8*)* }
+  %sp = alloca i8*, align 8
+  %regs = alloca [18 x i64], align 8
+  store { i8*, void (i8*, i8*)* } %fn_arg, { i8*, void (i8*, i8*)* }* %fn
+  %1 = bitcast [18 x i64]* %regs to i64*
+  call void asm sideeffect "std  14, $0", "=*m"(i64* %1)
+  %2 = bitcast [18 x i64]* %regs to i8*
+  %3 = getelementptr i8* %2, i32 8
+  %4 = bitcast i8* %3 to i64*
+  call void asm sideeffect "std  15, $0", "=*m"(i64* %4)
+  %5 = bitcast [18 x i64]* %regs to i8*
+  %6 = getelementptr i8* %5, i32 16
+  %7 = bitcast i8* %6 to i64*
+  call void asm sideeffect "std  16, $0", "=*m"(i64* %7)
+  %8 = bitcast [18 x i64]* %regs to i8*
+  %9 = getelementptr i8* %8, i32 24
+  %10 = bitcast i8* %9 to i64*
+  call void asm sideeffect "std  17, $0", "=*m"(i64* %10)
+  %11 = bitcast [18 x i64]* %regs to i8*
+  %12 = getelementptr i8* %11, i32 32
+  %13 = bitcast i8* %12 to i64*
+  call void asm sideeffect "std  18, $0", "=*m"(i64* %13)
+  %14 = bitcast [18 x i64]* %regs to i8*
+  %15 = getelementptr i8* %14, i32 40
+  %16 = bitcast i8* %15 to i64*
+  call void asm sideeffect "std  19, $0", "=*m"(i64* %16)
+  %17 = bitcast [18 x i64]* %regs to i8*
+  %18 = getelementptr i8* %17, i32 48
+  %19 = bitcast i8* %18 to i64*
+  call void asm sideeffect "std  20, $0", "=*m"(i64* %19)
+  %20 = bitcast [18 x i64]* %regs to i8*
+  %21 = getelementptr i8* %20, i32 56
+  %22 = bitcast i8* %21 to i64*
+  call void asm sideeffect "std  21, $0", "=*m"(i64* %22)
+  %23 = bitcast [18 x i64]* %regs to i8*
+  %24 = getelementptr i8* %23, i32 64
+  %25 = bitcast i8* %24 to i64*
+  call void asm sideeffect "std  22, $0", "=*m"(i64* %25)
+  %26 = bitcast [18 x i64]* %regs to i8*
+  %27 = getelementptr i8* %26, i32 72
+  %28 = bitcast i8* %27 to i64*
+  call void asm sideeffect "std  23, $0", "=*m"(i64* %28)
+  %29 = bitcast [18 x i64]* %regs to i8*
+  %30 = getelementptr i8* %29, i32 80
+  %31 = bitcast i8* %30 to i64*
+  call void asm sideeffect "std  24, $0", "=*m"(i64* %31)
+  %32 = bitcast [18 x i64]* %regs to i8*
+  %33 = getelementptr i8* %32, i32 88
+  %34 = bitcast i8* %33 to i64*
+  call void asm sideeffect "std  25, $0", "=*m"(i64* %34)
+  %35 = bitcast [18 x i64]* %regs to i8*
+  %36 = getelementptr i8* %35, i32 96
+  %37 = bitcast i8* %36 to i64*
+  call void asm sideeffect "std  26, $0", "=*m"(i64* %37)
+  %38 = bitcast [18 x i64]* %regs to i8*
+  %39 = getelementptr i8* %38, i32 104
+  %40 = bitcast i8* %39 to i64*
+  call void asm sideeffect "std  27, $0", "=*m"(i64* %40)
+  %41 = bitcast [18 x i64]* %regs to i8*
+  %42 = getelementptr i8* %41, i32 112
+  %43 = bitcast i8* %42 to i64*
+  call void asm sideeffect "std  28, $0", "=*m"(i64* %43)
+  %44 = bitcast [18 x i64]* %regs to i8*
+  %45 = getelementptr i8* %44, i32 120
+  %46 = bitcast i8* %45 to i64*
+  call void asm sideeffect "std  29, $0", "=*m"(i64* %46)
+  %47 = bitcast [18 x i64]* %regs to i8*
+  %48 = getelementptr i8* %47, i32 128
+  %49 = bitcast i8* %48 to i64*
+  call void asm sideeffect "std  30, $0", "=*m"(i64* %49)
+  %50 = bitcast [18 x i64]* %regs to i8*
+  %51 = getelementptr i8* %50, i32 136
+  %52 = bitcast i8* %51 to i64*
+  call void asm sideeffect "std  31, $0", "=*m"(i64* %52)
+  %53 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
+  %.funcptr = load void (i8*, i8*)** %53
+  %54 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
+  %.ptr = load i8** %54
+  %55 = load i8** %sp
+  call void %.funcptr(i8* %.ptr, i8* %55)
+  ret void
+}
+
--- a/test/CodeGen/PowerPC/ia-neg-const.ll
+++ b/test/CodeGen/PowerPC/ia-neg-const.ll
@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+
+; Function Attrs: nounwind
+define i64 @main() #0 {
+entry:
+  %x = alloca i64, align 8
+  store i64 0, i64* %x, align 8
+  %0 = call i64 asm sideeffect "ld       $0,$1\0A\09add${2:I}   $0,$0,$2", "=&r,*m,Ir"(i64* %x, i64 -1) #0
+  ret i64 %0
+}
+
+; CHECK: ld
+; CHECK-NOT: addi   3,3,4294967295
+; CHECK: addi   3,3,-1
+; CHECK: blr
+
+; Function Attrs: nounwind
+declare signext i32 @printf(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind }
+
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@ -37,6 +37,7 @@ entry:
 ; CHECK-DAG: subfic 0, [[REG]], -160
 ; CHECK: stdux 1, 1, 0

+; CHECK: .cfi_def_cfa_register r30
 ; CHECK: .cfi_offset r30, -16
 ; CHECK: .cfi_offset lr, 16

@ -59,6 +60,7 @@ entry:
 ; CHECK-FP-DAG: subfic 0, [[REG]], -160
 ; CHECK-FP: stdux 1, 1, 0

+; CHECK-FP: .cfi_def_cfa_register r30
 ; CHECK-FP: .cfi_offset r31, -8
 ; CHECK-FP: .cfi_offset r30, -16
 ; CHECK-FP: .cfi_offset lr, 16
@ -120,6 +122,8 @@ entry:
 ; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
 ; CHECK: stdux 1, 1, 0

+; CHECK: .cfi_def_cfa_register r30
+
 ; CHECK: blr

 ; CHECK-32-LABEL: @hoo
@ -178,6 +182,8 @@ entry:
 ; CHECK-DAG: subfic 0, [[REG]], -192
 ; CHECK: stdux 1, 1, 0

+; CHECK: .cfi_def_cfa_register r30
+
 ; CHECK: stfd 30, -16(30)

 ; CHECK: blr
@ -193,6 +199,8 @@ entry:
 ; CHECK-FP-DAG: subfic 0, [[REG]], -192
 ; CHECK-FP: stdux 1, 1, 0

+; CHECK-FP: .cfi_def_cfa_register r30
+
 ; CHECK-FP: stfd 30, -16(30)

 ; CHECK-FP: blr
--- a/test/CodeGen/PowerPC/subreg-postra-2.ll
+++ b/test/CodeGen/PowerPC/subreg-postra-2.ll
@ -0,0 +1,175 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction() #0 {
+entry:
+  br i1 undef, label %do.body, label %if.then5
+
+if.then5:                                         ; preds = %entry
+  unreachable
+
+do.body:                                          ; preds = %entry
+  br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i:                                        ; preds = %do.body
+  unreachable
+
+trace_jbd2_start_commit.exit:                     ; preds = %do.body
+  br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116:                                    ; preds = %trace_jbd2_start_commit.exit
+  unreachable
+
+trace_jbd2_commit_locking.exit:                   ; preds = %trace_jbd2_start_commit.exit
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %trace_jbd2_commit_locking.exit
+  unreachable
+
+while.end:                                        ; preds = %trace_jbd2_commit_locking.exit
+  br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144:                              ; preds = %while.end
+  unreachable
+
+spin_unlock.exit1146:                             ; preds = %while.end
+  br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152:                              ; preds = %spin_unlock.exit1146
+  unreachable
+
+spin_unlock.exit1154:                             ; preds = %spin_unlock.exit1146
+  br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159:                                    ; preds = %spin_unlock.exit1154
+  br i1 undef, label %if.end.i1166, label %do.body5.i1165
+
+do.body5.i1165:                                   ; preds = %do.body.i1159
+  unreachable
+
+if.end.i1166:                                     ; preds = %do.body.i1159
+  unreachable
+
+trace_jbd2_commit_flushing.exit:                  ; preds = %spin_unlock.exit1154
+  br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %trace_jbd2_commit_flushing.exit
+  unreachable
+
+for.end.i:                                        ; preds = %trace_jbd2_commit_flushing.exit
+  br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i:                              ; preds = %for.end.i
+  br label %journal_submit_data_buffers.exit
+
+journal_submit_data_buffers.exit:                 ; preds = %if.then.i.i.i.i31.i, %for.end.i
+  br i1 undef, label %if.end103, label %if.then102
+
+if.then102:                                       ; preds = %journal_submit_data_buffers.exit
+  unreachable
+
+if.end103:                                        ; preds = %journal_submit_data_buffers.exit
+  br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182:                                    ; preds = %if.end103
+  br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+do.body5.i1188:                                   ; preds = %do.body5.i1188, %do.body.i1182
+  br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+if.end.i1189:                                     ; preds = %do.body5.i1188, %do.body.i1182
+  unreachable
+
+trace_jbd2_commit_logging.exit:                   ; preds = %if.end103
+  br label %while.cond129.outer1451
+
+while.cond129.outer1451:                          ; preds = %start_journal_io, %trace_jbd2_commit_logging.exit
+  br label %while.cond129
+
+while.cond129:                                    ; preds = %if.then135, %while.cond129.outer1451
+  br i1 undef, label %while.end246, label %if.then135
+
+if.then135:                                       ; preds = %while.cond129
+  br i1 undef, label %start_journal_io, label %while.cond129
+
+start_journal_io:                                 ; preds = %if.then135
+  br label %while.cond129.outer1451
+
+while.end246:                                     ; preds = %while.cond129
+  br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277:                                   ; preds = %while.end246
+  unreachable
+
+for.end.i1287:                                    ; preds = %while.end246
+  br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i:                              ; preds = %for.end.i1287
+  unreachable
+
+journal_finish_inode_data_buffers.exit:           ; preds = %for.end.i1287
+  br i1 undef, label %if.end256, label %if.then249
+
+if.then249:                                       ; preds = %journal_finish_inode_data_buffers.exit
+  unreachable
+
+if.end256:                                        ; preds = %journal_finish_inode_data_buffers.exit
+  br label %while.body318
+
+while.body318:                                    ; preds = %wait_on_buffer.exit, %if.end256
+  br i1 undef, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296:                                    ; preds = %while.body318
+  br label %wait_on_buffer.exit
+
+wait_on_buffer.exit:                              ; preds = %if.then.i1296, %while.body318
+  br i1 undef, label %do.body378, label %while.body318
+
+do.body378:                                       ; preds = %wait_on_buffer.exit
+  br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph:                              ; preds = %do.body378
+  br label %while.body392
+
+while.body392:                                    ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+  %0 = load i8** undef, align 8
+  %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+  %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+  %tobool.i1316 = icmp eq i64 undef, 0
+  br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317:                                    ; preds = %while.body392
+  unreachable
+
+wait_on_buffer.exit1319:                          ; preds = %while.body392
+  %1 = load volatile i64* %b_state.i.i1314, align 8
+  %conv.i.i1322 = and i64 %1, 1
+  %lnot404 = icmp eq i64 %conv.i.i1322, 0
+  %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+  %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #0
+  store i8* %0, i8** undef, align 8
+  %cmp.i1312 = icmp eq i32* undef, undef
+  br i1 %cmp.i1312, label %while.end418, label %while.body392
+
+while.end418:                                     ; preds = %wait_on_buffer.exit1319, %do.body378
+  %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+  %tobool419 = icmp eq i32 %err.4.lcssa, 0
+  br i1 %tobool419, label %if.end421, label %if.then420
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+if.then420:                                       ; preds = %while.end418
+  unreachable
+
+if.end421:                                        ; preds = %while.end418
+  unreachable
+}
+
+attributes #0 = { nounwind }
+
--- a/test/CodeGen/PowerPC/subreg-postra.ll
+++ b/test/CodeGen/PowerPC/subreg-postra.ll
@ -0,0 +1,168 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction(i32* %journal) #0 {
+entry:
+  br i1 undef, label %do.body, label %if.then5
+
+if.then5:                                         ; preds = %entry
+  unreachable
+
+do.body:                                          ; preds = %entry
+  br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i:                                        ; preds = %do.body
+  unreachable
+
+trace_jbd2_start_commit.exit:                     ; preds = %do.body
+  br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116:                                    ; preds = %trace_jbd2_start_commit.exit
+  br i1 undef, label %if.end.i1123, label %do.body5.i1122
+
+do.body5.i1122:                                   ; preds = %do.body.i1116
+  unreachable
+
+if.end.i1123:                                     ; preds = %do.body.i1116
+  br label %trace_jbd2_commit_locking.exit
+
+trace_jbd2_commit_locking.exit:                   ; preds = %if.end.i1123, %trace_jbd2_start_commit.exit
+  br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144:                              ; preds = %trace_jbd2_commit_locking.exit
+  unreachable
+
+spin_unlock.exit1146:                             ; preds = %trace_jbd2_commit_locking.exit
+  br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152:                              ; preds = %spin_unlock.exit1146
+  br label %spin_unlock.exit1154
+
+spin_unlock.exit1154:                             ; preds = %if.then.i.i.i.i1152, %spin_unlock.exit1146
+  br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159:                                    ; preds = %spin_unlock.exit1154
+  unreachable
+
+trace_jbd2_commit_flushing.exit:                  ; preds = %spin_unlock.exit1154
+  br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %trace_jbd2_commit_flushing.exit
+  br i1 undef, label %spin_unlock.exit.i, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %for.body.lr.ph.i
+  unreachable
+
+spin_unlock.exit.i:                               ; preds = %for.body.lr.ph.i
+  unreachable
+
+for.end.i:                                        ; preds = %trace_jbd2_commit_flushing.exit
+  br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i:                              ; preds = %for.end.i
+  unreachable
+
+journal_submit_data_buffers.exit:                 ; preds = %for.end.i
+  br i1 undef, label %if.end103, label %if.then102
+
+if.then102:                                       ; preds = %journal_submit_data_buffers.exit
+  unreachable
+
+if.end103:                                        ; preds = %journal_submit_data_buffers.exit
+  br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182:                                    ; preds = %if.end103
+  unreachable
+
+trace_jbd2_commit_logging.exit:                   ; preds = %if.end103
+  br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277:                                   ; preds = %trace_jbd2_commit_logging.exit
+  unreachable
+
+for.end.i1287:                                    ; preds = %trace_jbd2_commit_logging.exit
+  br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i:                              ; preds = %for.end.i1287
+  unreachable
+
+journal_finish_inode_data_buffers.exit:           ; preds = %for.end.i1287
+  br i1 undef, label %if.end256, label %if.then249
+
+if.then249:                                       ; preds = %journal_finish_inode_data_buffers.exit
+  unreachable
+
+if.end256:                                        ; preds = %journal_finish_inode_data_buffers.exit
+  br i1 undef, label %do.body277, label %if.then260
+
+if.then260:                                       ; preds = %if.end256
+  br label %do.body277
+
+do.body277:                                       ; preds = %if.then260, %if.end256
+  br label %while.body318
+
+while.body318:                                    ; preds = %wait_on_buffer.exit, %do.body277
+  %tobool.i1295 = icmp eq i64 undef, 0
+  br i1 %tobool.i1295, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296:                                    ; preds = %while.body318
+  unreachable
+
+wait_on_buffer.exit:                              ; preds = %while.body318
+  br i1 undef, label %do.body378, label %while.body318
+
+do.body378:                                       ; preds = %wait_on_buffer.exit
+  br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph:                              ; preds = %do.body378
+  br label %while.body392
+
+while.body392:                                    ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+  %0 = load i8** undef, align 8
+  %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+  %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+  %tobool.i1316 = icmp eq i64 undef, 0
+  br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317:                                    ; preds = %while.body392
+  unreachable
+
+wait_on_buffer.exit1319:                          ; preds = %while.body392
+  %1 = load volatile i64* %b_state.i.i1314, align 8
+  %conv.i.i1322 = and i64 %1, 1
+  %lnot404 = icmp eq i64 %conv.i.i1322, 0
+  %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+  %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1
+  %prev.i.i.i1325 = getelementptr inbounds i8* %0, i64 8
+  %3 = load i32** null, align 8
+  store i32* %3, i32** undef, align 8
+  call void @__brelse(i32* undef) #1
+  br i1 undef, label %while.end418, label %while.body392
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+while.end418:                                     ; preds = %wait_on_buffer.exit1319, %do.body378
+  %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+  br i1 undef, label %if.end421, label %if.then420
+
+if.then420:                                       ; preds = %while.end418
+  call void @jbd2_journal_abort(i32* %journal, i32 signext %err.4.lcssa) #1
+  br label %if.end421
+
+if.end421:                                        ; preds = %if.then420, %while.end418
+  unreachable
+}
+
+declare void @jbd2_journal_abort(i32*, i32 signext)
+
+declare void @__brelse(i32*)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind }
+
--- a/Show More
+++ b/Show More