Import llvm 3.7.1 release (r255217).

svn path=/vendor/llvm/dist/; revision=292726 svn path=/vendor/llvm/llvm-release_371-r255217/; revision=292727; tag=vendor/llvm/llvm-release_371-r255217
2015-12-25 14:25:49 +00:00 · 2015-12-25 14:25:49 +00:00 · 2fe5752e3a · 2020-12-20 02:59:44 +00:00
commit 2fe5752e3a
parent 69156b4c20
80 changed files with 3659 additions and 236 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -60,7 +60,7 @@ set(CMAKE_MODULE_PATH

 set(LLVM_VERSION_MAJOR 3)
 set(LLVM_VERSION_MINOR 7)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)
 set(LLVM_VERSION_SUFFIX "")

 if (NOT PACKAGE_VERSION)
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@ -509,3 +509,10 @@ N: Michael Wong
 E: fraggamuffin@gmail.com 
 D: Clang OpenMP implementation

+N: Alexander Mussman
+E: alexander.musman@intel.com 
+D: Clang OpenMP implementation
+
+N: Kevin O'Brien
+E: caomhin@us.ibm.com 
+D: Clang OpenMP implementation
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.

-AC_INIT([LLVM],[3.7.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.7.1],[http://llvm.org/bugs/])

 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=

 AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
--- a/bindings/go/llvm/ir.go
+++ b/bindings/go/llvm/ir.go
@ -1728,7 +1728,7 @@ func (b Builder) CreatePtrDiff(lhs, rhs Value, name string) (v Value) {
 func (b Builder) CreateLandingPad(t Type, personality Value, nclauses int, name string) (l Value) {
 	cname := C.CString(name)
 	defer C.free(unsafe.Pointer(cname))
-	l.C = C.LLVMBuildLandingPad(b.C, t.C, C.unsigned(nclauses), cname)
+	l.C = C.LLVMBuildLandingPad(b.C, t.C, nil, C.unsigned(nclauses), cname)
 	return l
 }

--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@ -1745,7 +1745,7 @@ CAMLprim LLVMValueRef llvm_build_invoke_bc(value Args[], int NumArgs) {
 CAMLprim LLVMValueRef llvm_build_landingpad(LLVMTypeRef Ty, LLVMValueRef PersFn,
                                            value NumClauses,  value Name,
                                            value B) {
-    return LLVMBuildLandingPad(Builder_val(B), Ty, Int_val(NumClauses),
+    return LLVMBuildLandingPad(Builder_val(B), Ty, PersFn, Int_val(NumClauses),
                               String_val(Name));
 }

--- a/101
+++ b/101
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.7.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.7.1.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.7.0'
-PACKAGE_STRING='LLVM 3.7.0'
+PACKAGE_VERSION='3.7.1'
+PACKAGE_STRING='LLVM 3.7.1'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'

 ac_unique_file="lib/IR/Module.cpp"
@ -1333,7 +1333,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures LLVM 3.7.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.7.1 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1399,7 +1399,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.7.0:";;
+     short | recursive ) echo "Configuration of LLVM 3.7.1:";;
   esac
  cat <<\_ACEOF

@ -1583,7 +1583,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-LLVM configure 3.7.0
+LLVM configure 3.7.1
 generated by GNU Autoconf 2.60

 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -1599,7 +1599,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by LLVM $as_me 3.7.0, which was
+It was created by LLVM $as_me 3.7.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was

  $ $0 $@
@ -1955,7 +1955,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu

 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=


@ -8643,87 +8643,6 @@ fi

 if test "$llvm_cv_os_type" = "MingW" ; then

-{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5
-echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; }
-if test "${ac_cv_lib_imagehlp_main+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-limagehlp  $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-
-int
-main ()
-{
-return main ();
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest$ac_exeext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_lib_imagehlp_main=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_lib_imagehlp_main=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
-      conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_lib_imagehlp_main" >&5
-echo "${ECHO_T}$ac_cv_lib_imagehlp_main" >&6; }
-if test $ac_cv_lib_imagehlp_main = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBIMAGEHLP 1
-_ACEOF
-
-  LIBS="-limagehlp $LIBS"
-
-fi
-
-
 { echo "$as_me:$LINENO: checking for main in -lole32" >&5
 echo $ECHO_N "checking for main in -lole32... $ECHO_C" >&6; }
 if test "${ac_cv_lib_ole32_main+set}" = set; then
@ -18610,7 +18529,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.7.0, which was
+This file was extended by LLVM $as_me 3.7.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -18663,7 +18582,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.7.0
+LLVM config.status 3.7.1
 configured by $0, generated by GNU Autoconf 2.60,
  with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@ -25,7 +25,35 @@ LLVM web page, this document applies to the *next* release, not the current
 one.  To see the release notes for a specific release, please see the `releases
 page <http://llvm.org/releases/>`_.

-Non-comprehensive list of changes in this release
+Major changes in 3.7.1
+======================
+
+* 3.7.0 was released with an inadvertent change to the signature of the C
+  API function: LLVMBuildLandingPad, which made the C API incompatible with
+  prior releases.  This has been corrected in LLVM 3.7.1.
+
+  As a result of this change, 3.7.0 is not ABI compatible with 3.7.1.
+
+  +----------------------------------------------------------------------------+
+  | History of the LLVMBuildLandingPad() function                              |
+  +===========================+================================================+
+  | 3.6.2 and prior releases  | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     LLVMValueRef,              |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+  | 3.7.0                     | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+  | 3.7.1 and future releases | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     LLVMValueRef,              |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+
+
+Non-comprehensive list of changes in 3.7.0
 =================================================

 .. NOTE
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@ -2675,7 +2675,8 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                             const char *Name);
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 unsigned NumClauses, const char *Name);
+                                 LLVMValueRef PersFn, unsigned NumClauses,
+                                 const char *Name);
 LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn);
 LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);

--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@ -21,7 +21,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm//MC/SubtargetFeature.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) {
    Asm->OutStreamer->PopSection();
  }

-  if (shouldEmitMoves)
+  if (shouldEmitMoves || shouldEmitPersonality)
    Asm->OutStreamer->EmitWinCFIEndProc();
 }

--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                             ISD::ANY_EXTEND, dl, VT, Result);

      ValResult = Result;
-      ChainResult = Chain;
+      ChainResult = newLoad.getValue(1);
      return;
    }

--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,

  // Calculate the element offset and add it to the pointer.
  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+  assert(EltSize * 8 == EltVT.getSizeInBits() &&
+         "Converting bits to bytes lost precision");

  Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
                      DAG.getConstant(EltSize, dl, Index.getValueType()));
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
  if (CustomLowerNode(N, N->getValueType(0), true))
    return SDValue();

-  // Store the vector to the stack.
-  EVT EltVT = VecVT.getVectorElementType();
+  // Make the vector elements byte-addressable if they aren't already.
  SDLoc dl(N);
+  EVT EltVT = VecVT.getVectorElementType();
+  if (EltVT.getSizeInBits() < 8) {
+    SmallVector<SDValue, 4> ElementOps;
+    for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
+      ElementOps.push_back(DAG.getAnyExtOrTrunc(
+          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
+                      DAG.getConstant(i, dl, MVT::i8)),
+          dl, MVT::i8));
+    }
+
+    EltVT = MVT::i8;
+    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                             VecVT.getVectorNumElements());
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+  }
+
+  // Store the vector to the stack.
  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
                               MachinePointerInfo(), false, false, 0);
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@ -794,6 +794,10 @@ void SlotTracker::processFunction() {
  ST_DEBUG("begin processFunction!\n");
  fNext = 0;

+  // Process function metadata if it wasn't hit at the module-level.
+  if (!ShouldInitializeAllMetadata)
+    processFunctionMetadata(*TheFunction);
+
  // Add all the function arguments with no names.
  for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
      AE = TheFunction->arg_end(); AI != AE; ++AI)
@ -807,8 +811,6 @@ void SlotTracker::processFunction() {
    if (!BB.hasName())
      CreateFunctionSlot(&BB);

-    processFunctionMetadata(*TheFunction);
-
    for (auto &I : BB) {
      if (!I.getType()->isVoidTy() && !I.hasName())
        CreateFunctionSlot(&I);
@ -836,11 +838,11 @@ void SlotTracker::processFunction() {

 void SlotTracker::processFunctionMetadata(const Function &F) {
  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  for (auto &BB : F) {
-    F.getAllMetadata(MDs);
-    for (auto &MD : MDs)
-      CreateMetadataSlot(MD.second);
+  F.getAllMetadata(MDs);
+  for (auto &MD : MDs)
+    CreateMetadataSlot(MD.second);

+  for (auto &BB : F) {
    for (auto &I : BB)
      processInstructionMetadata(I);
  }
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
 }

 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 unsigned NumClauses, const char *Name) {
+                                 LLVMValueRef PersFn, unsigned NumClauses,
+                                 const char *Name) {
+  // The personality used to live on the landingpad instruction, but now it
+  // lives on the parent function. For compatibility, take the provided
+  // personality and put it on the parent function.
+  if (PersFn)
+    unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn(
+        cast<Function>(unwrap(PersFn)));
  return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name));
 }

--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() {
 #endif
 }

+static void handleLTODiagnostic(const DiagnosticInfo &DI) {
+  DiagnosticPrinterRawOStream DP(errs());
+  DI.print(DP);
+  errs() << "\n";
+}
+
 LTOCodeGenerator::LTOCodeGenerator()
-    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) {
+    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context),
+                                            handleLTODiagnostic) {
  initializeLTOPasses();
 }

 LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
    : OwnedContext(std::move(Context)), Context(*OwnedContext),
-      IRLinker(new Module("ld-temp.o", *OwnedContext)) {
+      IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) {
  initializeLTOPasses();
 }

--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@ -82,6 +82,7 @@ void MCContext::reset() {

  UsedNames.clear();
  Symbols.clear();
+  SectionSymbols.clear();
  Allocator.Reset();
  Instances.clear();
  CompilationDir.clear();
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
  for (const MachineBasicBlock &MBB : MF) {
    for (const MachineInstr &MI : MBB) {
      // TODO: CodeSize should account for multiple functions.
+
+      // TODO: Should we count size of debug info?
+      if (MI.isDebugValue())
+        continue;
+
+      // FIXME: This is reporting 0 for many instructions.
      CodeSize += MI.getDesc().Size;

      unsigned numOperands = MI.getNumOperands();
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
 //
 // TODO: Check isTriviallyVectorizable for calls and handle other
 // instructions.
-static bool canVectorizeInst(Instruction *Inst) {
+static bool canVectorizeInst(Instruction *Inst, User *User) {
  switch (Inst->getOpcode()) {
  case Instruction::Load:
-  case Instruction::Store:
  case Instruction::BitCast:
  case Instruction::AddrSpaceCast:
    return true;
+  case Instruction::Store: {
+    // Must be the stored pointer operand, not a stored value.
+    StoreInst *SI = cast<StoreInst>(Inst);
+    return SI->getPointerOperand() == User;
+  }
  default:
    return false;
  }
@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
  for (User *AllocaUser : Alloca->users()) {
    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
    if (!GEP) {
-      if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+      if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
        return false;

      WorkList.push_back(AllocaUser);
@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {

    GEPVectorIdx[GEP] = Index;
    for (User *GEPUser : AllocaUser->users()) {
-      if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+      if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
        return false;

      WorkList.push_back(GEPUser);
@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
  for (User *User : Val->users()) {
    if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
      continue;
-    if (isa<CallInst>(User)) {
+    if (CallInst *CI = dyn_cast<CallInst>(User)) {
+      // TODO: We might be able to handle some cases where the callee is a
+      // constantexpr bitcast of a function.
+      if (!CI->getCalledFunction())
+        return false;
+
      WorkList.push_back(User);
      continue;
    }
@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
    if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
      return false;

+    if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) {
+      // Reject if the stored value is not the pointer operand.
+      if (SI->getPointerOperand() != Val)
+        return false;
+    }
+
    if (!User->getType()->isPointerTy())
      continue;

--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@ -14,8 +14,7 @@
 let Namespace = "AMDGPU" in {

 foreach Index = 0-15 in {
-  // Indices are used in a variety of ways here, so don't set a size/offset.
-  def sub#Index : SubRegIndex<-1, -1>;
+  def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
 }

 def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
  }
 }

+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_1:
+    return 1;
+  case FK_Data_2:
+    return 2;
+  case FK_Data_4:
+    return 4;
+  case FK_Data_8:
+    return 8;
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  }
+}
+
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                  unsigned DataSize, uint64_t Value,
                                  bool IsPCRel) const {

  switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("Unknown fixup kind");
    case AMDGPU::fixup_si_sopp_br: {
      uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
      *Dst = (Value - 4) / 4;
@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
      *Dst = Value + 4;
      break;
    }
+    default: {
+      // FIXME: Copied from AArch64
+      unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+      if (!Value)
+        return; // Doesn't change encoding.
+      MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+      // Shift the value into position.
+      Value <<= Info.TargetOffset;
+
+      unsigned Offset = Fixup.getOffset();
+      assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+      // For each byte of the fragment that the fixup touches, mask in the
+      // bits from the fixup value.
+      for (unsigned i = 0; i != NumBytes; ++i)
+        Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    }
  }
 }

--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,

  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);

  setOperationAction(ISD::LOAD, MVT::i1, Custom);
@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
                                                  SDValue Ptr) const {
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
-                  0xffffffff; // Size

-  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
+  return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
 }

 SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {

  return RsrcDataFormat;
 }
+
+uint64_t SIInstrInfo::getScratchRsrcWords23() const {
+  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
+                    AMDGPU::RSRC_TID_ENABLE |
+                    0xffffffff; // Size;
+
+  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
+  // Clear them unless we want a huge stride.
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
+
+  return Rsrc23;
+}
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@ -353,7 +353,7 @@ class SIInstrInfo : public AMDGPUInstrInfo {
  }

  uint64_t getDefaultRsrcDataFormat() const;
-
+  uint64_t getScratchRsrcWords23() const;
 };

 namespace AMDGPU {
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
 // These instructions only exist on SI and CI
 let SubtargetPredicate = isSICI in {

+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
+  VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
+
 defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
  VOP_F32_F32_F32, AMDGPUfmin_legacy
 >;
@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
 } // End isCommutable = 1
 } // End let SubtargetPredicate = SICI

-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
-  VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
 defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
  VOP_I32_I32_I32
 >;
--- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
      unsigned ScratchRsrcReg =
          RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);

-      uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                      0xffffffff; // Size
+      uint64_t Rsrc23 = TII->getScratchRsrcWords23();

      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
-              .addImm(Rsrc & 0xffffffff)
+              .addImm(Rsrc23 & 0xffffffff)
              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
-              .addImm(Rsrc >> 32)
+              .addImm(Rsrc23 >> 32)
              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      // Scratch Offset
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@ -26,23 +26,25 @@ using namespace llvm;

 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}

+void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+  MCRegAliasIterator R(Reg, this, true);
+
+  for (; R.isValid(); ++R)
+    Reserved.set(*R);
+}
+
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  BitVector Reserved(getNumRegs());
-  Reserved.set(AMDGPU::EXEC);
-
-  // EXEC_LO and EXEC_HI could be allocated and used as regular register,
-  // but this seems likely to result in bugs, so I'm marking them as reserved.
-  Reserved.set(AMDGPU::EXEC_LO);
-  Reserved.set(AMDGPU::EXEC_HI);
-
  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
-  Reserved.set(AMDGPU::FLAT_SCR);
-  Reserved.set(AMDGPU::FLAT_SCR_LO);
-  Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
+  // this seems likely to result in bugs, so I'm marking them as reserved.
+  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
+  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);

  // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
-  Reserved.set(AMDGPU::VGPR255);
-  Reserved.set(AMDGPU::VGPR254);
+  reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
+  reserveRegisterTuples(Reserved, AMDGPU::VGPR255);

  // Tonga and Iceland can only allocate a fixed number of SGPRs due
  // to a hw bug.
@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {

    for (unsigned i = Limit; i < NumSGPRs; ++i) {
      unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
-      MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
-
-      for (; R.isValid(); ++R)
-        Reserved.set(*R);
+      reserveRegisterTuples(Reserved, Reg);
    }
  }

--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@ -23,7 +23,10 @@
 namespace llvm {

 struct SIRegisterInfo : public AMDGPURegisterInfo {
+private:
+  void reserveRegisterTuples(BitVector &, unsigned Reg) const;

+public:
  SIRegisterInfo();

  BitVector getReservedRegs(const MachineFunction &MF) const override;
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
    return false;
  }

+  Triple T;
+  STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
  getTargetStreamer().emitArch(ID);
  return false;
 }
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@ -50,6 +50,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel {

  // Complex Pattern for address selection.
  bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
 };
 }

@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
      Addr.getOpcode() == ISD::TargetGlobalAddress)
    return false;

-  // Addresses of the form FI+const or FI|const
+  // Addresses of the form Addr+const or Addr|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<32>(CN->getSExtValue())) {
@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
  return true;
 }

+// ComplexPattern used on BPF FI instruction
+bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+  SDLoc DL(Addr);
+
+  if (!CurDAG->isBaseWithConstantOffset(Addr))
+    return false;
+
+  // Addresses of the form Addr+const or Addr|const
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+  if (isInt<32>(CN->getSExtValue())) {
+
+    // If the first operand is a FI, get the TargetFI Node
+    if (FrameIndexSDNode *FIN =
+            dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+    else
+      return false;
+
+    Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64);
+    return true;
+  }
+
+  return false;
+}
+
 SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
  unsigned Opcode = Node->getOpcode();

@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
  // tablegen selection should be handled here.
  switch (Opcode) {
  default: break;
-
-  case ISD::UNDEF: {
-    errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n';
-    report_fatal_error("shouldn't see UNDEF during Select");
-    break;
-  }
-
  case ISD::INTRINSIC_W_CHAIN: {
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
    switch (IntNo) {
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,

  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
  setOperationAction(ISD::SETCC, MVT::i64, Expand);
  setOperationAction(ISD::SELECT, MVT::i64, Expand);
@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::SUBC, MVT::i64, Expand);
  setOperationAction(ISD::SUBE, MVT::i64, Expand);

-  // no UNDEF allowed
-  setOperationAction(ISD::UNDEF, MVT::i64, Expand);
-
  setOperationAction(ISD::ROTR, MVT::i64, Expand);
  setOperationAction(ISD::ROTL, MVT::i64, Expand);
  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm),
                [{return isInt<32>(N->getSExtValue()); }]>;

 // Addressing modes.
-def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>;
+def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>;
+def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>;

 // Address operands
 def MEMri : Operand<i64> {
@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">;
 def MOV_ri : MOV_RI<"mov">;
 }

+def FI_ri
+    : InstBPF<(outs GPR:$dst), (ins MEMri:$addr),
+               "lea\t$dst, $addr",
+               [(set i64:$dst, FIri:$addr)]> {
+  // This is a tentative instruction, and will be replaced
+  // with MOV_rr and ADD_ri in PEI phase
+}
+
+
 def LD_pseudo
    : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
              "ld_pseudo\t$dst, $pseudo, $imm",
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

  unsigned FrameReg = getFrameRegister(MF);
  int FrameIndex = MI.getOperand(i).getIndex();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  MachineBasicBlock &MBB = *MI.getParent();

  if (MI.getOpcode() == BPF::MOV_rr) {
-    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);

    MI.getOperand(i).ChangeToRegister(FrameReg, false);
-
-    MachineBasicBlock &MBB = *MI.getParent();
    unsigned reg = MI.getOperand(i - 1).getReg();
    BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
        .addReg(reg)
@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
  if (!isInt<32>(Offset))
    llvm_unreachable("bug in frame offset");

-  MI.getOperand(i).ChangeToRegister(FrameReg, false);
-  MI.getOperand(i + 1).ChangeToImmediate(Offset);
+  if (MI.getOpcode() == BPF::FI_ri) {
+    // architecture does not really support FI_ri, replace it with
+    //    MOV_rr <target_reg>, frame_reg
+    //    ADD_ri <target_reg>, imm
+    unsigned reg = MI.getOperand(i - 1).getReg();
+
+    BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg)
+        .addReg(FrameReg);
+    BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg)
+        .addReg(reg)
+        .addImm(Offset);
+
+    // Remove FI_ri instruction
+    MI.eraseFromParent();
+  } else {
+    MI.getOperand(i).ChangeToRegister(FrameReg, false);
+    MI.getOperand(i + 1).ChangeToImmediate(Offset);
+  }
 }

 unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@ -269,6 +269,14 @@ namespace llvm {
    unsigned getRegisterByName(const char* RegName, EVT VT,
                               SelectionDAG &DAG) const override;

+    /// Returns true if a cast between SrcAS and DestAS is a noop.
+    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+      // Mips doesn't have any special address spaces so we just reserve
+      // the first 256 for software use (e.g. OpenCL) and treat casts
+      // between them as noops.
+      return SrcAS < 256 && DestAS < 256;
+    }
+
  protected:
    SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;

--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
    if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
      continue;

+    // Also, we have to check that the register class of the operand
+    // contains the zero register.
+    if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg))
+      continue;
+
    MO.setReg(ZeroReg);
  }

--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    return;
  }
  case PPC::ADDISdtprelHA:
-    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
-    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
+    // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %Xs, sym@dtprel@ha
  case PPC::ADDISdtprelHA32: {
-    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
-    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha
+    // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDIS %Rs, sym@dtprel@ha
    const MachineOperand &MO = MI->getOperand(2);
    const GlobalValue *GValue = MO.getGlobal();
    MCSymbol *MOSymbol = getSymbol(GValue);
@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
        *OutStreamer,
        MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
            .addReg(MI->getOperand(0).getReg())
-            .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3)
+            .addReg(MI->getOperand(1).getReg())
            .addExpr(SymDtprel));
    return;
  }
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
 // Determining the address of a TLS variable results in a function call in
 // certain TLS models.
 static bool memAddrUsesCTR(const PPCTargetMachine *TM,
-                           const llvm::Value *MemAddr) {
+                           const Value *MemAddr) {
  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-  if (!GV)
+  if (!GV) {
+    // Recurse to check for constants that refer to TLS global variables.
+    if (const auto *CV = dyn_cast<Constant>(MemAddr))
+      for (const auto &CO : CV->operands())
+        if (memAddrUsesCTR(TM, CO))
+          return true;
+
    return false;
+  }
+
  if (!GV->isThreadLocal())
    return false;
  if (!TM)
@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
          switch (F->getIntrinsicID()) {
          default: continue;
+          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+          // we're definitely using CTR.
+          case Intrinsic::ppc_is_decremented_ctr_nonzero:
+	  case Intrinsic::ppc_mtctr:
+	    return true;

 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
+    DEBUG(dbgs() << "Nested loop converted\n");
  }

  // If a nested loop has been converted, then we can't convert this loop.
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
      return nullptr;
    }
    // ISD::OR doesn't get all the bitfield insertion fun.
-    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+    // bitfield insert.
    if (isInt32Immediate(N->getOperand(1), Imm) &&
        N->getOperand(0).getOpcode() == ISD::OR &&
        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+      // The idea here is to check whether this is equivalent to:
+      //   (c1 & m) | (x & ~m)
+      // where m is a run-of-ones mask. The logic here is that, for each bit in
+      // c1 and c2:
+      //  - if both are 1, then the output will be 1.
+      //  - if both are 0, then the output will be 0.
+      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+      //    come from x.
+      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+      //    be 0.
+      //  If that last condition is never the case, then we can form m from the
+      //  bits that are the same between c1 and c2.
      unsigned MB, ME;
-      Imm = ~(Imm^Imm2);
-      if (isRunOfOnes(Imm, MB, ME)) {
+      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
        SDValue Ops[] = { N->getOperand(0).getOperand(0),
                            N->getOperand(0).getOperand(1),
                            getI32Imm(0, dl), getI32Imm(MB, dl),
@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
        SDValue Base, Offset;

        if (LD->isUnindexed() &&
+            (LD->getMemoryVT() == MVT::f64 ||
+             LD->getMemoryVT() == MVT::i64) &&
            SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
          SDValue Chain = LD->getChain();
          SDValue Ops[] = { Base, Offset, Chain };
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
      setOperationAction(ISD::SELECT, VT, Promote);
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::SELECT_CC, VT, Promote);
+      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
      setOperationAction(ISD::STORE, VT, Promote);
      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);

@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        PPC::isSplatShuffleMask(SVOp, 4) ||
        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
-        PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
-        PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   ||
-        PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
+        (Subtarget.hasP8Altivec() && (
+         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
      return Op;
    }
  }
@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
-      PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
-      PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             ||
-      PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
+      (Subtarget.hasP8Altivec() && (
+       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
    return Op;

  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
  unsigned MB = MI->getOperand(4).getImm();
  unsigned ME = MI->getOperand(5).getImm();

+  // We can't commute a trivial mask (there is no way to represent an all-zero
+  // mask).
+  if (MB == 0 && ME == 31)
+    return nullptr;
+
  if (NewMI) {
    // Create a new instruction.
    unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)),
          (SELECT_I8 $in, (LI8 1), (LI8 0))>;

 // match setcc on i1 variables.
+// CRANDC is:
+//   1 1 : F
+//   1 0 : T
+//   0 1 : F
+//   0 0 : F
+//
+// LT is:
+//  -1 -1  : F
+//  -1  0  : T
+//   0 -1  : F
+//   0  0  : F
+//
+// ULT is:
+//   1 1 : F
+//   1 0 : F
+//   0 1 : T
+//   0 0 : F
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
-          (CRANDC $s2, $s1)>;
+          (CRANDC $s1, $s2)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
          (CRANDC $s2, $s1)>;
+// CRORC is:
+//   1 1 : T
+//   1 0 : T
+//   0 1 : F
+//   0 0 : T
+//
+// LE is:
+//  -1 -1 : T
+//  -1  0 : T
+//   0 -1 : F
+//   0  0 : T
+//
+// ULE is:
+//   1 1 : T
+//   1 0 : F
+//   0 1 : T
+//   0 0 : T
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
-          (CRORC $s2, $s1)>;
+          (CRORC $s1, $s2)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
          (CRORC $s2, $s1)>;
+
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
          (CREQV $s1, $s2)>;
+
+// GE is:
+//  -1 -1 : T
+//  -1  0 : F
+//   0 -1 : T
+//   0  0 : T
+//
+// UGE is:
+//   1 1 : T
+//   1 0 : T
+//   0 1 : F
+//   0 0 : T
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
-          (CRORC $s1, $s2)>;
+          (CRORC $s2, $s1)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
          (CRORC $s1, $s2)>;
+
+// GT is:
+//  -1 -1 : F
+//  -1  0 : F
+//   0 -1 : T
+//   0  0 : F
+//
+// UGT is:
+//  1 1 : F
+//  1 0 : T
+//  0 1 : F
+//  0 0 : F
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
-          (CRANDC $s1, $s2)>;
+          (CRANDC $s2, $s1)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
          (CRANDC $s1, $s2)>;
+
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
          (CRXOR $s1, $s2)>;

@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
 //   select (lhs == rhs), tval, fval is:
 //   ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+           (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+                 (CRAND (CRORC  $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)),
           (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
                 (CRAND (CRORC  $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+           (CROR (CRAND (CRORC  $lhs, $rhs), $tval),
+                 (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)),
           (CROR (CRAND (CRORC  $rhs, $lhs), $tval),
                 (CRAND (CRANDC $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
           (CROR (CRAND (CREQV $lhs, $rhs), $tval),
                 (CRAND (CRXOR $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+           (CROR (CRAND (CRORC  $rhs, $lhs), $tval),
+                 (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)),
           (CROR (CRAND (CRORC  $lhs, $rhs), $tval),
                 (CRAND (CRANDC $rhs, $lhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+           (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+                 (CRAND (CRORC  $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)),
           (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
                 (CRAND (CRORC  $rhs, $lhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),

 // match selectcc on i1 variables with non-i1 output.
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+          (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)),
          (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+          (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)),
          (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
          (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+          (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)),
          (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+          (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)),
          (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
          (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+          (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)),
          (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+          (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)),
          (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
          (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+          (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)),
          (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+          (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)),
          (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
          (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+          (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
          (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+          (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
          (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
          (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+          (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
          (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+          (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
          (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
          (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+          (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
          (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+          (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
          (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
          (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+          (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
          (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+          (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
          (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
          (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+          (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)),
          (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+          (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)),
          (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
          (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+          (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)),
          (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+          (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
          (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
          (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
          (COPY_TO_REGCLASS $src, QFRC)>;

 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
          (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
          (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
          (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
          (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
          (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
          (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;

 // Selects.
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+          (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
          (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+          (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
          (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
          (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+          (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
          (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+          (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
          (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
          (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;

 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+          (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
          (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+          (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
          (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
          (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+          (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
          (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+          (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
          (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
          (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
            (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
  def : Pat<(f64 (fextend f32:$src)),
            (COPY_TO_REGCLASS $src, VSFRC)>;
+
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
            (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+            (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
            (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
            (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+            (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
            (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+            (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
-          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+            (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;

  // VSX Elementary Scalar FP arithmetic (SP)
  let isCommutable = 1 in {
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@ -103,6 +103,11 @@ namespace {

        VNInfo *AddendValNo =
          LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+        if (!AddendValNo) {
+          // This can be null if the register is undef.
+          continue;
+        }
+
        MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);

        // The addend and this instruction must be in the same block.
@ -181,11 +186,14 @@ namespace {
        if (!KilledProdOp)
          continue;

-        // For virtual registers, verify that the addend source register
-        // is live here (as should have been assured above).
-        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
-                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
-               "Addend source register is not live!");
+	// If the addend copy is used only by this MI, then the addend source
+	// register is likely not live here. This could be fixed (based on the
+	// legality checks above, the live range for the addend source register
+	// could be extended), but it seems likely that such a trivial copy can
+	// be coalesced away later, and thus is not worth the effort.
+	if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+            !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
+          continue;

        // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.

--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
  for (MachineBasicBlock &MBB : *MF) {
    for (MachineInstr &MI : MBB) {

+      if (MI.isDebugValue())
+        continue;
+
      bool RelevantInstr = false;
      bool Partial = false;

--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser {
  bool parseDirectiveWord(unsigned Size, SMLoc L);

  bool is64Bit() const {
-    return STI.getTargetTriple().getArchName().startswith("sparcv9");
+    return STI.getTargetTriple().getArch() == Triple::sparcv9;
  }

  void expandSET(MCInst &Inst, SMLoc IDLoc,
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -13573,6 +13573,35 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
                       DAG.getConstant(SSECC, dl, MVT::i8));
  }

+  MVT VTOp0 = Op0.getSimpleValueType();
+  assert(VTOp0 == Op1.getSimpleValueType() &&
+         "Expected operands with same type!");
+  assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
+         "Invalid number of packed elements for source and destination!");
+
+  if (VT.is128BitVector() && VTOp0.is256BitVector()) {
+    // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
+    // legalizer to a wider vector type.  In the case of 'vsetcc' nodes, the
+    // legalizer firstly checks if the first operand in input to the setcc has
+    // a legal type. If so, then it promotes the return type to that same type.
+    // Otherwise, the return type is promoted to the 'next legal type' which,
+    // for a vector of MVT::i1 is always a 128-bit integer vector type.
+    //
+    // We reach this code only if the following two conditions are met:
+    // 1. Both return type and operand type have been promoted to wider types
+    //    by the type legalizer.
+    // 2. The original operand type has been promoted to a 256-bit vector.
+    //
+    // Note that condition 2. only applies for AVX targets.
+    SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode);
+    return DAG.getZExtOrTrunc(NewOp, dl, VT);
+  }
+
+  // The non-AVX512 code below works under the assumption that source and
+  // destination types are the same.
+  assert((Subtarget->hasAVX512() || (VT == VTOp0)) &&
+         "Value types for source and destination must be the same!");
+
  // Break 256-bit integer vector compare into smaller ones.
  if (VT.is256BitVector() && !Subtarget->hasInt256())
    return Lower256IntVSETCC(Op, DAG);
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager(
  // Start of function pass.
  // Break up aggregate allocas, using SSAUpdater.
  if (UseNewSROA)
-    MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+    MPM.add(createSROAPass());
  else
    MPM.add(createScalarReplAggregatesPass(-1, false));
  MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
    if (isa<PHINode>(V))
      V->takeName(LI);
    if (Instruction *I = dyn_cast<Instruction>(V))
-      I->setDebugLoc(LI->getDebugLoc());
+      if (LI->getDebugLoc())
+        I->setDebugLoc(LI->getDebugLoc());
    if (V->getType()->getScalarType()->isPointerTy())
      MD->invalidateCachedPointerInfo(V);
    markInstructionForDeletion(LI);
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
      PN->replaceAllUsesWith(*Inserted.first);
      PN->eraseFromParent();
      Changed = true;
+
+      // The RAUW can change PHIs that we already visited. Start over from the
+      // beginning.
+      PHISet.clear();
+      I = BB->begin();
    }
  }

--- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@ -0,0 +1,37 @@
+; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test_debug_value:
+; CHECK: s_load_dwordx2
+; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1
+; CHECK: buffer_store_dword
+; CHECK: s_endpgm
+define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14
+  store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !DIExpression()
+!14 = !DILocation(line: 1, column: 42, scope: !4)
--- a/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
@ -0,0 +1,22 @@
+; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s
+
+; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
+; function is a constantexpr cast of a function.
+
+declare void @foo(float*) #0
+declare void @foo.varargs(...) #0
+
+; CHECK: error: unsupported call to function foo in crash_call_constexpr_cast
+define void @crash_call_constexpr_cast() #0 {
+  %alloca = alloca i32
+  call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
+  ret void
+}
+
+define void @crash_call_constexpr_cast_varargs() #0 {
+  %alloca = alloca i32
+  call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
+  ret void
+}
+
+attributes #0 = { nounwind }
--- a/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
+
+; Pointer value is stored in a candidate for LDS usage.
+
+; GCN-LABEL: {{^}}stored_lds_pointer_value:
+; GCN: buffer_store_dword v
+define void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
+  %tmp = alloca float
+  store float 0.0, float *%tmp
+  store float* %tmp, float* addrspace(1)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}stored_lds_pointer_value_gep:
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
+bb:
+  %tmp = alloca float, i32 16
+  store float 0.0, float* %tmp
+  %tmp2 = getelementptr inbounds float, float* %tmp, i32 %idx
+  store float* %tmp2, float* addrspace(1)* %ptr
+  ret void
+}
+
+; Pointer value is stored in a candidate for vector usage
+; GCN-LABEL: {{^}}stored_vector_pointer_value:
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+define void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
+entry:
+  %tmp0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 1, i32* %y
+  store i32 2, i32* %z
+  store i32 3, i32* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 %index
+  store i32* %tmp1, i32* addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
--- a/test/CodeGen/AMDGPU/trunc-store.ll
+++ b/test/CodeGen/AMDGPU/trunc-store.ll
@ -0,0 +1,48 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}truncstore_arg_v16i32_to_v16i8:
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @truncstore_arg_v16i32_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i32> %in) {
+  %trunc = trunc <16 x i32> %in to <16 x i8>
+  store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncstore_arg_v16i64_to_v16i8:
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @truncstore_arg_v16i64_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i64> %in) {
+  %trunc = trunc <16 x i64> %in to <16 x i8>
+  store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out
+  ret void
+}
--- a/test/CodeGen/BPF/fi_ri.ll
+++ b/test/CodeGen/BPF/fi_ri.ll
@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+%struct.key_t = type { i32, [16 x i8] }
+
+; Function Attrs: nounwind uwtable
+define i32 @test() #0 {
+  %key = alloca %struct.key_t, align 4
+  %1 = bitcast %struct.key_t* %key to i8*
+; CHECK: mov	r1, 0
+; CHECK: stw	-8(r10), r1
+; CHECK: std	-16(r10), r1
+; CHECK: std	-24(r10), r1
+  call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 20, i32 4, i1 false)
+; CHECK: mov	r1, r10
+; CHECK: addi	r1, -20
+  %2 = getelementptr inbounds %struct.key_t, %struct.key_t* %key, i64 0, i32 1, i64 0
+; CHECK: call	test1
+  call void @test1(i8* %2) #3
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+declare void @test1(i8*) #2
--- a/test/CodeGen/BPF/sockex2.ll
+++ b/test/CodeGen/BPF/sockex2.ll
@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK-NOT: implicit
+; CHECK: implicit-def: R
 ; CHECK: ld_64   r1
 ; CHECK-NOT: ori
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
--- a/test/CodeGen/BPF/undef.ll
+++ b/test/CodeGen/BPF/undef.ll
@ -0,0 +1,68 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+%struct.bpf_map_def = type { i32, i32, i32, i32 }
+%struct.__sk_buff = type opaque
+%struct.routing_key_2 = type { [6 x i8] }
+
+@routing = global %struct.bpf_map_def { i32 1, i32 6, i32 12, i32 1024 }, section "maps", align 4
+@routing_miss_0 = global %struct.bpf_map_def { i32 1, i32 1, i32 12, i32 1 }, section "maps", align 4
+@test1 = global %struct.bpf_map_def { i32 2, i32 4, i32 8, i32 1024 }, section "maps", align 4
+@test1_miss_4 = global %struct.bpf_map_def { i32 2, i32 1, i32 8, i32 1 }, section "maps", align 4
+@_license = global [4 x i8] c"GPL\00", section "license", align 1
+@llvm.used = appending global [6 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @_license, i32 0, i32 0), i8* bitcast (i32 (%struct.__sk_buff*)* @ebpf_filter to i8*), i8* bitcast (%struct.bpf_map_def* @routing to i8*), i8* bitcast (%struct.bpf_map_def* @routing_miss_0 to i8*), i8* bitcast (%struct.bpf_map_def* @test1 to i8*), i8* bitcast (%struct.bpf_map_def* @test1_miss_4 to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind uwtable
+define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
+  %key = alloca %struct.routing_key_2, align 1
+  %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0
+; CHECK: mov	r1, 5
+; CHECK: stb	-8(r10), r1
+  store i8 5, i8* %1, align 1
+  %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1
+; CHECK: mov	r1, 6
+; CHECK: stb	-7(r10), r1
+  store i8 6, i8* %2, align 1
+  %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2
+; CHECK: mov	r1, 7
+; CHECK: stb	-6(r10), r1
+  store i8 7, i8* %3, align 1
+  %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3
+; CHECK: mov	r1, 8
+; CHECK: stb	-5(r10), r1
+  store i8 8, i8* %4, align 1
+  %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4
+; CHECK: mov	r1, 9
+; CHECK: stb	-4(r10), r1
+  store i8 9, i8* %5, align 1
+  %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5
+; CHECK: mov	r1, 10
+; CHECK: stb	-3(r10), r1
+  store i8 10, i8* %6, align 1
+  %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0
+; CHECK: mov	r1, r10
+; CHECK: addi	r1, -2
+; CHECK: mov	r2, 0
+; CHECK: sth	6(r1), r2
+; CHECK: sth	4(r1), r2
+; CHECK: sth	2(r1), r2
+; CHECK: sth	24(r10), r2
+; CHECK: sth	22(r10), r2
+; CHECK: sth	20(r10), r2
+; CHECK: sth	18(r10), r2
+; CHECK: sth	16(r10), r2
+; CHECK: sth	14(r10), r2
+; CHECK: sth	12(r10), r2
+; CHECK: sth	10(r10), r2
+; CHECK: sth	8(r10), r2
+; CHECK: sth	6(r10), r2
+; CHECK: sth	-2(r10), r2
+; CHECK: sth	26(r10), r2
+  call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false)
+  %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3
+  ret i32 undef
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+declare i32 @bpf_map_lookup_elem(...) #2
--- a/test/CodeGen/Mips/llvm-ir/addrspacecast.ll
+++ b/test/CodeGen/Mips/llvm-ir/addrspacecast.ll
@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL
+
+; Address spaces 1-255 are software defined.
+define i32* @cast(i32 *%arg) {
+  %1 = addrspacecast i32* %arg to i32 addrspace(1)*
+  %2 = addrspacecast i32 addrspace(1)* %1 to i32 addrspace(2)*
+  %3 = addrspacecast i32 addrspace(2)* %2 to i32 addrspace(0)*
+  ret i32* %3
+}
+
+; ALL-LABEL: cast:
+; ALL:           move   $2, $4
--- a/test/CodeGen/Mips/llvm-ir/extractelement.ll
+++ b/test/CodeGen/Mips/llvm-ir/extractelement.ll
@ -0,0 +1,19 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL
+
+; This test triggered a bug in the vector splitting where the type legalizer
+; attempted to extract the element with by storing the vector, then reading
+; an element back. However, the address calculation was:
+;   Base + Index * (EltSizeInBits / 8)
+; and EltSizeInBits was 1. This caused the index to be forgotten.
+define i1 @via_stack_bug(i8 signext %idx) {
+  %1 = extractelement <2 x i1> <i1 false, i1 true>, i8 %idx
+  ret i1 %1
+}
+
+; ALL-LABEL: via_stack_bug:
+; ALL-DAG:       addiu  [[ONE:\$[0-9]+]], $zero, 1
+; ALL-DAG:       sb     [[ONE]], 7($sp)
+; ALL-DAG:       sb     $zero, 6($sp)
+; ALL-DAG:       addiu  [[VPTR:\$[0-9]+]], $sp, 6
+; ALL-DAG:       addu   [[EPTR:\$[0-9]+]], $4, [[VPTR]]
+; ALL:           lbu    $2, 0([[EPTR]])
--- a/test/CodeGen/Mips/micromips-zero-mat-uses.ll
+++ b/test/CodeGen/Mips/micromips-zero-mat-uses.ll
@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+micromips,+nooddspreg -O0 < %s | FileCheck %s
+
+; CHECK: addiu    $[[R0:[0-9]+]], $zero, 0
+; CHECK: subu16   $2, $[[R0]], ${{[0-9]+}}
+define i32 @foo() {
+  %1 = sub i32 0, undef
+  ret i32 %1
+}
--- a/test/CodeGen/PowerPC/ctr-loop-tls-const.ll
+++ b/test/CodeGen/PowerPC/ctr-loop-tls-const.ll
@ -0,0 +1,40 @@
+; RUN: llc -mcpu=pwr7 -relocation-model=pic < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@x = thread_local global [1600 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define void @foo(i32 signext %v) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %induction5 = or i64 %index, 1
+  %0 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %index
+  %1 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %induction5
+  %2 = load i32, i32* %0, align 4
+  %3 = load i32, i32* %1, align 4
+  %4 = add nsw i32 %2, %v
+  %5 = add nsw i32 %3, %v
+  store i32 %4, i32* %0, align 4
+  store i32 %5, i32* %1, align 4
+  %index.next = add i64 %index, 2
+  %6 = icmp eq i64 %index.next, 1600
+  br i1 %6, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: mtctr
+; CHECK: __tls_get_addr
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+
--- a/test/CodeGen/PowerPC/ctrloop-intrin.ll
+++ b/test/CodeGen/PowerPC/ctrloop-intrin.ll
@ -0,0 +1,349 @@
+; RUN: llc < %s
+; ModuleID = 'new.bc'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le--linux-gnu"
+
+@.str.87 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str.1.88 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str.2.89 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str.3.90 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str.4.91 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str.5.92 = external hidden unnamed_addr constant [13 x i8], align 1
+@.str.6.93 = external hidden unnamed_addr constant [10 x i8], align 1
+@.str.7.94 = external hidden unnamed_addr constant [9 x i8], align 1
+@.str.8.95 = external hidden unnamed_addr constant [2 x i8], align 1
+@.str.9.96 = external hidden unnamed_addr constant [2 x i8], align 1
+@.str.10.97 = external hidden unnamed_addr constant [3 x i8], align 1
+@.str.11.98 = external hidden unnamed_addr constant [3 x i8], align 1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare i8* @halide_string_to_string(i8*, i8*, i8*) #1
+
+; Function Attrs: nounwind
+declare i8* @halide_int64_to_string(i8*, i8*, i64, i32) #1
+
+; Function Attrs: nounwind
+define weak i8* @halide_double_to_string(i8* %dst, i8* %end, double %arg, i32 %scientific) #1 {
+entry:
+  %arg.addr = alloca double, align 8
+  %bits = alloca i64, align 8
+  %buf = alloca [512 x i8], align 1
+  store double %arg, double* %arg.addr, align 8, !tbaa !4
+  %0 = bitcast i64* %bits to i8*
+  call void @llvm.lifetime.start(i64 8, i8* %0) #0
+  store i64 0, i64* %bits, align 8, !tbaa !8
+  %1 = bitcast double* %arg.addr to i8*
+  %call = call i8* @memcpy(i8* %0, i8* %1, i64 8) #2
+  %2 = load i64, i64* %bits, align 8, !tbaa !8
+  %and = and i64 %2, 4503599627370495
+  %shr = lshr i64 %2, 52
+  %shr.tr = trunc i64 %shr to i32
+  %conv = and i32 %shr.tr, 2047
+  %shr2 = lshr i64 %2, 63
+  %conv3 = trunc i64 %shr2 to i32
+  %cmp = icmp eq i32 %conv, 2047
+  br i1 %cmp, label %if.then, label %if.else.15
+
+if.then:                                          ; preds = %entry
+  %tobool = icmp eq i64 %and, 0
+  %tobool5 = icmp ne i32 %conv3, 0
+  br i1 %tobool, label %if.else.9, label %if.then.4
+
+if.then.4:                                        ; preds = %if.then
+  br i1 %tobool5, label %if.then.6, label %if.else
+
+if.then.6:                                        ; preds = %if.then.4
+  %call7 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.87, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else:                                          ; preds = %if.then.4
+  %call8 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1.88, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.9:                                        ; preds = %if.then
+  br i1 %tobool5, label %if.then.11, label %if.else.13
+
+if.then.11:                                       ; preds = %if.else.9
+  %call12 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2.89, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.13:                                       ; preds = %if.else.9
+  %call14 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3.90, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.15:                                       ; preds = %entry
+  %cmp16 = icmp eq i32 %conv, 0
+  %cmp17 = icmp eq i64 %and, 0
+  %or.cond = and i1 %cmp17, %cmp16
+  br i1 %or.cond, label %if.then.18, label %if.end.32
+
+if.then.18:                                       ; preds = %if.else.15
+  %tobool19 = icmp eq i32 %scientific, 0
+  %tobool21 = icmp ne i32 %conv3, 0
+  br i1 %tobool19, label %if.else.26, label %if.then.20
+
+if.then.20:                                       ; preds = %if.then.18
+  br i1 %tobool21, label %if.then.22, label %if.else.24
+
+if.then.22:                                       ; preds = %if.then.20
+  %call23 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4.91, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.24:                                       ; preds = %if.then.20
+  %call25 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.5.92, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.26:                                       ; preds = %if.then.18
+  br i1 %tobool21, label %if.then.28, label %if.else.30
+
+if.then.28:                                       ; preds = %if.else.26
+  %call29 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.6.93, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.30:                                       ; preds = %if.else.26
+  %call31 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.7.94, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.end.32:                                        ; preds = %if.else.15
+  %tobool33 = icmp eq i32 %conv3, 0
+  br i1 %tobool33, label %if.end.37, label %if.then.34
+
+if.then.34:                                       ; preds = %if.end.32
+  %call35 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8.95, i64 0, i64 0)) #3
+  %sub36 = fsub double -0.000000e+00, %arg
+  store double %sub36, double* %arg.addr, align 8, !tbaa !4
+  br label %if.end.37
+
+if.end.37:                                        ; preds = %if.then.34, %if.end.32
+  %.pr = phi double [ %sub36, %if.then.34 ], [ %arg, %if.end.32 ]
+  %dst.addr.0 = phi i8* [ %call35, %if.then.34 ], [ %dst, %if.end.32 ]
+  %tobool38 = icmp eq i32 %scientific, 0
+  br i1 %tobool38, label %if.else.62, label %while.condthread-pre-split
+
+while.condthread-pre-split:                       ; preds = %if.end.37
+  %cmp40.261 = fcmp olt double %.pr, 1.000000e+00
+  br i1 %cmp40.261, label %while.body, label %while.cond.41thread-pre-split
+
+while.body:                                       ; preds = %while.body, %while.condthread-pre-split
+  %exponent_base_10.0262 = phi i32 [ %dec, %while.body ], [ 0, %while.condthread-pre-split ]
+  %3 = phi double [ %mul, %while.body ], [ %.pr, %while.condthread-pre-split ]
+  %mul = fmul double %3, 1.000000e+01
+  %dec = add nsw i32 %exponent_base_10.0262, -1
+  %cmp40 = fcmp olt double %mul, 1.000000e+00
+  br i1 %cmp40, label %while.body, label %while.cond.while.cond.41thread-pre-split_crit_edge
+
+while.cond.while.cond.41thread-pre-split_crit_edge: ; preds = %while.body
+  store double %mul, double* %arg.addr, align 8, !tbaa !4
+  br label %while.cond.41thread-pre-split
+
+while.cond.41thread-pre-split:                    ; preds = %while.cond.while.cond.41thread-pre-split_crit_edge, %while.condthread-pre-split
+  %.pr246 = phi double [ %mul, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ %.pr, %while.condthread-pre-split ]
+  %exponent_base_10.0.lcssa = phi i32 [ %dec, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ 0, %while.condthread-pre-split ]
+  %cmp42.257 = fcmp ult double %.pr246, 1.000000e+01
+  br i1 %cmp42.257, label %while.end.44, label %while.body.43
+
+while.body.43:                                    ; preds = %while.body.43, %while.cond.41thread-pre-split
+  %exponent_base_10.1258 = phi i32 [ %inc, %while.body.43 ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ]
+  %4 = phi double [ %div, %while.body.43 ], [ %.pr246, %while.cond.41thread-pre-split ]
+  %div = fdiv double %4, 1.000000e+01
+  %inc = add nsw i32 %exponent_base_10.1258, 1
+  %cmp42 = fcmp ult double %div, 1.000000e+01
+  br i1 %cmp42, label %while.cond.41.while.end.44_crit_edge, label %while.body.43
+
+while.cond.41.while.end.44_crit_edge:             ; preds = %while.body.43
+  store double %div, double* %arg.addr, align 8, !tbaa !4
+  br label %while.end.44
+
+while.end.44:                                     ; preds = %while.cond.41.while.end.44_crit_edge, %while.cond.41thread-pre-split
+  %exponent_base_10.1.lcssa = phi i32 [ %inc, %while.cond.41.while.end.44_crit_edge ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ]
+  %.lcssa = phi double [ %div, %while.cond.41.while.end.44_crit_edge ], [ %.pr246, %while.cond.41thread-pre-split ]
+  %mul45 = fmul double %.lcssa, 1.000000e+06
+  %add = fadd double %mul45, 5.000000e-01
+  %conv46 = fptoui double %add to i64
+  %div47 = udiv i64 %conv46, 1000000
+  %5 = mul i64 %div47, -1000000
+  %sub49 = add i64 %conv46, %5
+  %call50 = call i8* @halide_int64_to_string(i8* %dst.addr.0, i8* %end, i64 %div47, i32 1) #3
+  %call51 = call i8* @halide_string_to_string(i8* %call50, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3
+  %call52 = call i8* @halide_int64_to_string(i8* %call51, i8* %end, i64 %sub49, i32 6) #3
+  %cmp53 = icmp sgt i32 %exponent_base_10.1.lcssa, -1
+  br i1 %cmp53, label %if.then.54, label %if.else.56
+
+if.then.54:                                       ; preds = %while.end.44
+  %call55 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.10.97, i64 0, i64 0)) #3
+  br label %if.end.59
+
+if.else.56:                                       ; preds = %while.end.44
+  %call57 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.11.98, i64 0, i64 0)) #3
+  %sub58 = sub nsw i32 0, %exponent_base_10.1.lcssa
+  br label %if.end.59
+
+if.end.59:                                        ; preds = %if.else.56, %if.then.54
+  %exponent_base_10.2 = phi i32 [ %exponent_base_10.1.lcssa, %if.then.54 ], [ %sub58, %if.else.56 ]
+  %dst.addr.1 = phi i8* [ %call55, %if.then.54 ], [ %call57, %if.else.56 ]
+  %conv60 = sext i32 %exponent_base_10.2 to i64
+  %call61 = call i8* @halide_int64_to_string(i8* %dst.addr.1, i8* %end, i64 %conv60, i32 2) #3
+  br label %cleanup.148
+
+if.else.62:                                       ; preds = %if.end.37
+  br i1 %cmp16, label %if.then.64, label %if.end.66
+
+if.then.64:                                       ; preds = %if.else.62
+  %call65 = call i8* @halide_double_to_string(i8* %dst.addr.0, i8* %end, double 0.000000e+00, i32 0) #3
+  br label %cleanup.148
+
+if.end.66:                                        ; preds = %if.else.62
+  %add68 = or i64 %and, 4503599627370496
+  %sub70 = add nsw i32 %conv, -1075
+  %cmp71 = icmp ult i32 %conv, 1075
+  br i1 %cmp71, label %if.then.72, label %if.end.105
+
+if.then.72:                                       ; preds = %if.end.66
+  %cmp73 = icmp slt i32 %sub70, -52
+  br i1 %cmp73, label %if.end.84, label %if.else.76
+
+if.else.76:                                       ; preds = %if.then.72
+  %sub77 = sub nsw i32 1075, %conv
+  %sh_prom = zext i32 %sub77 to i64
+  %shr78 = lshr i64 %add68, %sh_prom
+  %shl81 = shl i64 %shr78, %sh_prom
+  %sub82 = sub i64 %add68, %shl81
+  br label %if.end.84
+
+if.end.84:                                        ; preds = %if.else.76, %if.then.72
+  %integer_part.0 = phi i64 [ %shr78, %if.else.76 ], [ 0, %if.then.72 ]
+  %f.0.in = phi i64 [ %sub82, %if.else.76 ], [ %add68, %if.then.72 ]
+  %f.0 = uitofp i64 %f.0.in to double
+  %conv85.244 = zext i32 %sub70 to i64
+  %shl86 = shl i64 %conv85.244, 52
+  %add88 = add i64 %shl86, 4696837146684686336
+  %6 = bitcast i64 %add88 to double
+  %mul90 = fmul double %6, %f.0
+  %add91 = fadd double %mul90, 5.000000e-01
+  %conv92 = fptoui double %add91 to i64
+  %conv93 = uitofp i64 %conv92 to double
+  %and96 = and i64 %conv92, 1
+  %notlhs = fcmp oeq double %conv93, %add91
+  %notrhs = icmp ne i64 %and96, 0
+  %not.or.cond245 = and i1 %notrhs, %notlhs
+  %dec99 = sext i1 %not.or.cond245 to i64
+  %fractional_part.0 = add i64 %dec99, %conv92
+  %cmp101 = icmp eq i64 %fractional_part.0, 1000000
+  %inc103 = zext i1 %cmp101 to i64
+  %inc103.integer_part.0 = add i64 %inc103, %integer_part.0
+  %.fractional_part.0 = select i1 %cmp101, i64 0, i64 %fractional_part.0
+  br label %if.end.105
+
+if.end.105:                                       ; preds = %if.end.84, %if.end.66
+  %integer_part.2 = phi i64 [ %inc103.integer_part.0, %if.end.84 ], [ %add68, %if.end.66 ]
+  %integer_exponent.0 = phi i32 [ 0, %if.end.84 ], [ %sub70, %if.end.66 ]
+  %fractional_part.2 = phi i64 [ %.fractional_part.0, %if.end.84 ], [ 0, %if.end.66 ]
+  %7 = bitcast [512 x i8]* %buf to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %7) #0
+  %add.ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 512
+  %add.ptr106 = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 480
+  %call109 = call i8* @halide_int64_to_string(i8* %add.ptr106, i8* %add.ptr, i64 %integer_part.2, i32 1) #3
+  %cmp110.252 = icmp sgt i32 %integer_exponent.0, 0
+  br i1 %cmp110.252, label %for.cond.112.preheader, label %for.cond.cleanup
+
+for.cond.112.preheader:                           ; preds = %if.end.138, %if.end.105
+  %i.0255 = phi i32 [ %inc140, %if.end.138 ], [ 0, %if.end.105 ]
+  %int_part_ptr.0253 = phi i8* [ %int_part_ptr.1, %if.end.138 ], [ %add.ptr106, %if.end.105 ]
+  %int_part_ptr.02534 = ptrtoint i8* %int_part_ptr.0253 to i64
+  %cmp114.249 = icmp eq i8* %call109, %int_part_ptr.0253
+  br i1 %cmp114.249, label %if.end.138, label %for.body.116.preheader
+
+for.body.116.preheader:                           ; preds = %for.cond.112.preheader
+  %8 = sub i64 0, %int_part_ptr.02534
+  %scevgep5 = getelementptr i8, i8* %call109, i64 %8
+  %scevgep56 = ptrtoint i8* %scevgep5 to i64
+  call void @llvm.ppc.mtctr.i64(i64 %scevgep56)
+  br label %for.body.116
+
+for.cond.cleanup:                                 ; preds = %if.end.138, %if.end.105
+  %int_part_ptr.0.lcssa = phi i8* [ %add.ptr106, %if.end.105 ], [ %int_part_ptr.1, %if.end.138 ]
+  %9 = bitcast [512 x i8]* %buf to i8*
+  %call142 = call i8* @halide_string_to_string(i8* %dst.addr.0, i8* %end, i8* %int_part_ptr.0.lcssa) #3
+  %call143 = call i8* @halide_string_to_string(i8* %call142, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3
+  %call144 = call i8* @halide_int64_to_string(i8* %call143, i8* %end, i64 %fractional_part.2, i32 6) #3
+  call void @llvm.lifetime.end(i64 512, i8* %9) #0
+  br label %cleanup.148
+
+for.cond.cleanup.115:                             ; preds = %for.body.116
+  br i1 %cmp125, label %if.then.136, label %if.end.138
+
+for.body.116:                                     ; preds = %for.body.116, %for.body.116.preheader
+  %call109.pn = phi i8* [ %p.0251, %for.body.116 ], [ %call109, %for.body.116.preheader ]
+  %carry.0250 = phi i32 [ %carry.1, %for.body.116 ], [ 0, %for.body.116.preheader ]
+  %call109.pn2 = ptrtoint i8* %call109.pn to i64
+  %p.0251 = getelementptr inbounds i8, i8* %call109.pn, i64 -1
+  %scevgep3 = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2
+  %10 = load i8, i8* %scevgep3, align 1, !tbaa !10
+  %sub118 = add i8 %10, -48
+  %conv120 = sext i8 %sub118 to i32
+  %mul121 = shl nsw i32 %conv120, 1
+  %add122 = or i32 %mul121, %carry.0250
+  %11 = trunc i32 %add122 to i8
+  %cmp125 = icmp sgt i8 %11, 9
+  %sub128 = add nsw i32 %add122, 246
+  %carry.1 = zext i1 %cmp125 to i32
+  %new_digit.0.in = select i1 %cmp125, i32 %sub128, i32 %add122
+  %add133 = add nsw i32 %new_digit.0.in, 48
+  %conv134 = trunc i32 %add133 to i8
+  %scevgep = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2
+  store i8 %conv134, i8* %scevgep, align 1, !tbaa !10
+  %12 = call i1 @llvm.ppc.is.decremented.ctr.nonzero()
+  br i1 %12, label %for.body.116, label %for.cond.cleanup.115
+
+if.then.136:                                      ; preds = %for.cond.cleanup.115
+  %incdec.ptr137 = getelementptr inbounds i8, i8* %int_part_ptr.0253, i64 -1
+  store i8 49, i8* %incdec.ptr137, align 1, !tbaa !10
+  br label %if.end.138
+
+if.end.138:                                       ; preds = %if.then.136, %for.cond.cleanup.115, %for.cond.112.preheader
+  %int_part_ptr.1 = phi i8* [ %incdec.ptr137, %if.then.136 ], [ %call109, %for.cond.112.preheader ], [ %int_part_ptr.0253, %for.cond.cleanup.115 ]
+  %inc140 = add nuw nsw i32 %i.0255, 1
+  %exitcond = icmp eq i32 %inc140, %integer_exponent.0
+  br i1 %exitcond, label %for.cond.cleanup, label %for.cond.112.preheader
+
+cleanup.148:                                      ; preds = %for.cond.cleanup, %if.then.64, %if.end.59, %if.else.30, %if.then.28, %if.else.24, %if.then.22, %if.else.13, %if.then.11, %if.else, %if.then.6
+  %retval.1 = phi i8* [ %call7, %if.then.6 ], [ %call8, %if.else ], [ %call12, %if.then.11 ], [ %call14, %if.else.13 ], [ %call23, %if.then.22 ], [ %call25, %if.else.24 ], [ %call29, %if.then.28 ], [ %call31, %if.else.30 ], [ %call65, %if.then.64 ], [ %call61, %if.end.59 ], [ %call144, %for.cond.cleanup ]
+  %13 = bitcast i64* %bits to i8*
+  call void @llvm.lifetime.end(i64 8, i8* %13) #0
+  ret i8* %retval.1
+}
+
+; Function Attrs: nounwind
+declare i8* @memcpy(i8*, i8* nocapture readonly, i64) #1
+
+; Function Attrs: nounwind
+declare void @llvm.ppc.mtctr.i64(i64) #0
+
+; Function Attrs: nounwind
+declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{!"clang version 3.7.0 (branches/release_37 246867) (llvm/branches/release_37 246866)"}
+!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
+!2 = !{i32 2, !"halide_mcpu", !"pwr8"}
+!3 = !{i32 2, !"halide_mattrs", !"+altivec,+vsx,+power8-altivec,+direct-move"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long long", !6, i64 0}
+!10 = !{!6, !6, i64 0}
--- a/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir
+++ b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir
@ -0,0 +1,92 @@
+# RUN: llc -start-after=dead-mi-elimination -stop-after=twoaddressinstruction -o /dev/null %s | FileCheck %s
+
+--- |
+  target datalayout = "E-m:e-i64:64-n32:64"
+  target triple = "powerpc64-unknown-linux-gnu"
+  
+  @d = global i32 15, align 4
+  @b = global i32* @d, align 8
+  @a = common global i32 0, align 4
+  
+  ; Function Attrs: nounwind
+  define signext i32 @main() #0 {
+  entry:
+    %0 = load i32*, i32** @b, align 8
+    %1 = load i32, i32* @a, align 4
+    %lnot = icmp eq i32 %1, 0
+    %lnot.ext = zext i1 %lnot to i32
+    %shr.i = lshr i32 2072, %lnot.ext
+    %call.lobit = lshr i32 %shr.i, 7
+    %2 = and i32 %call.lobit, 1
+    %3 = load i32, i32* %0, align 4
+    %or = or i32 %2, %3
+    store i32 %or, i32* %0, align 4
+    %4 = load i32, i32* @a, align 4
+    %lnot.1 = icmp eq i32 %4, 0
+    %lnot.ext.1 = zext i1 %lnot.1 to i32
+    %shr.i.1 = lshr i32 2072, %lnot.ext.1
+    %call.lobit.1 = lshr i32 %shr.i.1, 7
+    %5 = and i32 %call.lobit.1, 1
+    %or.1 = or i32 %5, %or
+    store i32 %or.1, i32* %0, align 4
+    ret i32 %or.1
+  }
+  
+  attributes #0 = { nounwind "target-cpu"="ppc64" }
+
+...
+---
+name:            main
+alignment:       2
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           true
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+registers:       
+  - { id: 0, class: g8rc_and_g8rc_nox0 }
+  - { id: 1, class: g8rc_and_g8rc_nox0 }
+  - { id: 2, class: gprc }
+  - { id: 3, class: gprc }
+  - { id: 4, class: gprc }
+  - { id: 5, class: g8rc_and_g8rc_nox0 }
+  - { id: 6, class: g8rc_and_g8rc_nox0 }
+  - { id: 7, class: gprc }
+  - { id: 8, class: gprc }
+  - { id: 9, class: gprc }
+  - { id: 10, class: g8rc }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %x2
+
+    %0 = ADDIStocHA %x2, @b
+    %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b)
+    %2 = LWZ 0, %1 :: (load 4 from %ir.0)
+    %3 = LI 0
+    %4 = RLWIMI %3, killed %2, 0, 0, 31
+    ; CHECK-LABEL: name: main
+    ; CHECK: %[[REG1:[0-9]+]] = LI 0
+    ; CHECK: %[[REG2:[0-9]+]] = COPY %[[REG1]]
+    ; CHECK: %[[REG2]] = RLWIMI %[[REG2]], killed %2, 0, 0, 31
+    %8 = RLWIMI %3, %4, 0, 0, 31
+    STW %4, 0, %1 :: (store 4 into %ir.0)
+    %10 = EXTSW_32_64 %8
+    STW %8, 0, %1 :: (store 4 into %ir.0)
+    %x3 = COPY %10
+    BLR8 implicit %x3, implicit %lr8, implicit %rm
+
+...
--- a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
+++ b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define <2 x i32> @test1(<4 x i32> %wide.vec) #0 {
+entry:
+  %strided.vec = shufflevector <4 x i32> %wide.vec, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %strided.vec
+
+; CHECK-LABEL: @test1
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define <16 x i8> @test2(<16 x i8> %wide.vec) #0 {
+entry:
+  %strided.vec = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11>
+  ret <16 x i8> %strided.vec
+
+; CHECK-LABEL: @test2
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+
--- a/test/CodeGen/PowerPC/pr24546.ll
+++ b/test/CodeGen/PowerPC/pr24546.ll
@ -0,0 +1,116 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s
+
+; Verify that we no longer crash in VSX swap removal when debug values
+; are in the code stream.
+
+@php_intpow10.powers = external unnamed_addr constant [23 x double], align 8
+
+; Function Attrs: nounwind
+define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 {
+entry:
+  br i1 undef, label %if.then, label %if.else, !dbg !32
+
+if.then:                                          ; preds = %entry
+  %conv = sitofp i32 undef to double, !dbg !34
+  br i1 undef, label %if.then.i, label %if.end.i, !dbg !36
+
+if.then.i:                                        ; preds = %if.then
+  %call.i = tail call double @pow(double 1.000000e+01, double undef) #3, !dbg !39
+  br label %php_intpow10.exit, !dbg !41
+
+if.end.i:                                         ; preds = %if.then
+  %0 = load double, double* undef, align 8, !dbg !42, !tbaa !43
+  br label %php_intpow10.exit, !dbg !47
+
+php_intpow10.exit:                                ; preds = %if.end.i, %if.then.i
+  %retval.0.i = phi double [ %call.i, %if.then.i ], [ %0, %if.end.i ], !dbg !48
+  tail call void @llvm.dbg.value(metadata double %retval.0.i, i64 0, metadata !15, metadata !49), !dbg !50
+  %div = fdiv double %conv, %retval.0.i, !dbg !51
+  br label %if.end.15, !dbg !52
+
+if.else:                                          ; preds = %entry
+  %mul = fmul double %value, undef, !dbg !53
+  br label %if.end.15
+
+if.end.15:                                        ; preds = %if.else, %php_intpow10.exit
+  %tmp_value.1 = phi double [ %div, %php_intpow10.exit ], [ %mul, %if.else ]
+  ret double %tmp_value.1, !dbg !57
+}
+
+declare signext i32 @php_intlog10abs(...) #1
+
+declare signext i32 @php_round_helper(...) #1
+
+; Function Attrs: nounwind
+declare double @pow(double, double) #0
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29, !30}
+!llvm.ident = !{!31}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5, globals: !23)
+!1 = !DIFile(filename: "testcase.i", directory: "/tmp/glibc.build")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!5 = !{!6, !18}
+!6 = !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, function: double (double, i32, i32)* @_php_math_round, variables: !10)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!4, !4, !9, !9}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11, !12, !13, !14, !15, !16, !17}
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f1", scope: !6, file: !1, line: 17, type: !4)
+!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f2", scope: !6, file: !1, line: 17, type: !4)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
+!18 = !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!4, !9}
+!21 = !{!22}
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
+!23 = !{!24}
+!24 = !DIGlobalVariable(name: "powers", scope: !18, file: !1, line: 3, type: !25, isLocal: true, isDefinition: true, variable: [23 x double]* @php_intpow10.powers)
+!25 = !DICompositeType(tag: DW_TAG_array_type, baseType: !26, size: 1472, align: 64, elements: !27)
+!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !4)
+!27 = !{!28}
+!28 = !DISubrange(count: 23)
+!29 = !{i32 2, !"Dwarf Version", i32 4}
+!30 = !{i32 2, !"Debug Info Version", i32 3}
+!31 = !{!"clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)"}
+!32 = !DILocation(line: 21, column: 32, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 21, column: 6)
+!34 = !DILocation(line: 22, column: 15, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !33, file: !1, line: 21, column: 67)
+!36 = !DILocation(line: 8, column: 16, scope: !37, inlinedAt: !38)
+!37 = distinct !DILexicalBlock(scope: !18, file: !1, line: 8, column: 6)
+!38 = distinct !DILocation(line: 23, column: 8, scope: !35)
+!39 = !DILocation(line: 9, column: 10, scope: !40, inlinedAt: !38)
+!40 = distinct !DILexicalBlock(scope: !37, file: !1, line: 8, column: 31)
+!41 = !DILocation(line: 9, column: 3, scope: !40, inlinedAt: !38)
+!42 = !DILocation(line: 11, column: 9, scope: !18, inlinedAt: !38)
+!43 = !{!44, !44, i64 0}
+!44 = !{!"double", !45, i64 0}
+!45 = !{!"omnipotent char", !46, i64 0}
+!46 = !{!"Simple C/C++ TBAA"}
+!47 = !DILocation(line: 11, column: 2, scope: !18, inlinedAt: !38)
+!48 = !DILocation(line: 23, column: 8, scope: !35)
+!49 = !DIExpression()
+!50 = !DILocation(line: 17, column: 13, scope: !6)
+!51 = !DILocation(line: 24, column: 25, scope: !35)
+!52 = !DILocation(line: 25, column: 2, scope: !35)
+!53 = !DILocation(line: 27, column: 22, scope: !54)
+!54 = distinct !DILexicalBlock(scope: !55, file: !1, line: 26, column: 20)
+!55 = distinct !DILexicalBlock(scope: !56, file: !1, line: 26, column: 7)
+!56 = distinct !DILexicalBlock(scope: !33, file: !1, line: 25, column: 9)
+!57 = !DILocation(line: 32, column: 2, scope: !6)
--- a/test/CodeGen/PowerPC/pr25157.ll
+++ b/test/CodeGen/PowerPC/pr25157.ll
@ -0,0 +1,58 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify correct generation of an lxsspx rather than an invalid optimization
+; to lxvdsx.  Bugpoint-reduced test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+  br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+  br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+  unreachable
+
+L.LB38_2426:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+  br label %L.LB38_2140
+
+L.LB38_2140:
+  ret void
+
+L.LB38_2438:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+  unreachable
+
+L.LB38_2451:
+  br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+  unreachable
+
+L.LB38_2452:
+  %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+  %1 = fpext float %0 to double
+  %2 = insertelement <2 x double> undef, double %1, i32 1
+  store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+  unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
--- a/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll
+++ b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll
@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@m = external global i32, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #0 {
+entry:
+
+; CHECK-LABEL: @main
+; CHECK-NOT: rlwimi
+; CHECK: andi
+
+  %0 = load i32, i32* @m, align 4
+  %or = or i32 %0, 250
+  store i32 %or, i32* @m, align 4
+  %and = and i32 %or, 249
+  %sub.i = sub i32 %and, 0
+  %sext = shl i32 %sub.i, 24
+  %conv = ashr exact i32 %sext, 24
+  ret i32 %conv
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind }
+
--- a/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/test/CodeGen/PowerPC/select-i1-vs-i1.ll
--- a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@ -0,0 +1,38 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @LSH_recall_init(float %d_min, float %W) #0 {
+entry:
+  br i1 undef, label %for.body.lr.ph, label %for.end
+
+; CHECK-LABEL: @LSH_recall_init
+; CHECK: xsnmsubadp
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv3 = fpext float %W to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %div = fdiv fast float 0.000000e+00, 0.000000e+00
+  %add = fadd fast float %div, %d_min
+  %conv2 = fpext float %add to double
+  %0 = tail call double @llvm.sqrt.f64(double %conv2)
+  %div4 = fdiv fast double %conv3, %0
+  %call = tail call signext i32 bitcast (i32 (...)* @p_col_helper to i32 (double)*)(double %div4) #2
+  br label %for.body
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.sqrt.f64(double) #1
+
+declare signext i32 @p_col_helper(...) #2
+
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
--- a/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll
@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @acosh_float8() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> <float 0x3FE62E4200000000, float 0x3FE62E4200000000, float 0x3FE62E4200000000, float 0x3FE62E4200000000>, <4 x float> undef) #0
+  %astype.i.i.74.i = bitcast <4 x float> %0 to <4 x i32>
+  %and.i.i.76.i = and <4 x i32> %astype.i.i.74.i, undef
+  %or.i.i.79.i = or <4 x i32> %and.i.i.76.i, undef
+  %astype5.i.i.80.i = bitcast <4 x i32> %or.i.i.79.i to <4 x float>
+  %1 = shufflevector <4 x float> %astype5.i.i.80.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <8 x float> undef, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  store <8 x float> %2, <8 x float>* undef, align 32
+  br label %if.end
+
+; CHECK-LABEL: @acosh_float8
+; CHECK: xvmaddasp
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
--- a/test/CodeGen/X86/pr24374.ll
+++ b/test/CodeGen/X86/pr24374.ll
@ -0,0 +1,37 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }]
+
+declare i32 @__gxx_personality_seh0(...)
+
+; Function Attrs: nounwind
+define void @f() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_seh0 to i8*) {
+entry:
+  invoke void @g()
+          to label %exit unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+exit:                                             ; preds = %entry
+  unreachable
+}
+; CHECK-LABEL: f:
+; CHECK:       .seh_proc f
+; CHECK:               .seh_handler __gxx_personality_seh0, @unwind, @except
+; CHECK:       callq g
+; CHECK:               .seh_handlerdata
+; CHECK:               .seh_endproc
+
+define void @g() {
+  unreachable
+}
+; CHECK-LABEL: g:
+; CHECK:       .seh_proc g
+; CHECK:       .seh_endproc
+
+attributes #0 = { nounwind }
--- a/test/CodeGen/X86/setcc-lowering.ll
+++ b/test/CodeGen/X86/setcc-lowering.ll
@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+
+; Verify that we don't crash during codegen due to a wrong lowering
+; of a setcc node with illegal operand types and return type.
+
+define <8 x i16> @pr25080(<8 x i32> %a) {
+; CHECK-LABEL: pr25080:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %0 = trunc <8 x i32> %a to <8 x i23>
+  %1 = icmp eq <8 x i23> %0, zeroinitializer
+  %2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ret <8 x i16> %3
+}
--- a/test/DebugInfo/gvn.ll
+++ b/test/DebugInfo/gvn.ll
@ -0,0 +1,135 @@
+; RUN: opt < %s -O2 -gvn -S | FileCheck %s
+;
+; Produced at -O2 from:
+; struct context {
+;   int cur_pid
+; };
+; int a, b, c, f, d;
+; int pid_for_task(int);
+; sample(struct context *p1)
+; {
+;   if (c)
+;     b = a;
+;   if (a && p1->cur_pid)
+;     sample_internal();
+; }
+; callback() {
+;   f = pid_for_task(d);
+;   sample(&f);
+; }
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+%struct.context = type { i32 }
+
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+; Function Attrs: nounwind
+declare i32 @sample_internal(...)
+
+; Function Attrs: nounwind
+define i32 @callback() #0 {
+entry:
+  %0 = load i32, i32* @d, align 4, !dbg !37
+
+  ; Verify that the call still has a debug location after GVN.
+  ; CHECK: %call = tail call i32 @pid_for_task(i32 %0) #{{[0-9]}}, !dbg
+  %call = tail call i32 @pid_for_task(i32 %0) #3, !dbg !37
+
+  store i32 %call, i32* @f, align 4, !dbg !37
+  tail call void @llvm.dbg.value(metadata %struct.context* bitcast (i32* @f to %struct.context*), i64 0, metadata !25, metadata !26) #3, !dbg !38
+  %1 = load i32, i32* @c, align 4, !dbg !40
+  %tobool.i = icmp eq i32 %1, 0, !dbg !40
+  %.pr.i = load i32, i32* @a, align 4, !dbg !41
+  br i1 %tobool.i, label %if.end.i, label %if.then.i, !dbg !42
+
+if.then.i:                                        ; preds = %entry
+  store i32 %.pr.i, i32* @b, align 4, !dbg !43
+  br label %if.end.i, !dbg !43
+
+if.end.i:                                         ; preds = %if.then.i, %entry
+  %tobool1.i = icmp eq i32 %.pr.i, 0, !dbg !41
+
+  ; This instruction has no debug location -- in this
+  ; particular case it was removed by a bug in SimplifyCFG.
+  %2 = load i32, i32* @f, align 4
+
+  ; GVN is supposed to replace the load of @f with a direct reference to %call.
+  ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
+  %tobool2.i = icmp eq i32 %2, 0, !dbg !41
+
+  %or.cond = or i1 %tobool1.i, %tobool2.i, !dbg !41
+  br i1 %or.cond, label %sample.exit, label %if.then.3.i, !dbg !41
+
+if.then.3.i:                                      ; preds = %if.end.i
+  %call.i = tail call i32 bitcast (i32 (...)* @sample_internal to i32 ()*)() #3, !dbg !44
+  br label %sample.exit, !dbg !44
+
+sample.exit:                                      ; preds = %if.end.i, %if.then.3.i
+  ret i32 undef, !dbg !45
+}
+
+declare i32 @pid_for_task(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !16)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !13}
+!4 = !DISubprogram(name: "sample", scope: !5, file: !5, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DIFile(filename: "test.i", directory: "/")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "context", file: !5, line: 1, size: 32, align: 32, elements: !11)
+!11 = !{!12}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "cur_pid", scope: !10, file: !5, line: 2, baseType: !8, size: 32, align: 32)
+!13 = !DISubprogram(name: "callback", scope: !5, file: !5, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: false, function: i32 ()* @callback, variables: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!8}
+!16 = !{!17, !18, !19, !20, !21}
+!17 = !DIGlobalVariable(name: "a", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @a)
+!18 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @b)
+!19 = !DIGlobalVariable(name: "c", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @c)
+!20 = !DIGlobalVariable(name: "f", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @f)
+!21 = !DIGlobalVariable(name: "d", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @d)
+!22 = !{i32 2, !"Dwarf Version", i32 2}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)"}
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p1", arg: 1, scope: !4, file: !5, line: 6, type: !9)
+!26 = !DIExpression()
+!27 = !DILocation(line: 6, scope: !4)
+!28 = !DILocation(line: 8, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !4, file: !5, line: 8)
+!30 = !DILocation(line: 10, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
+!32 = !DILocation(line: 8, scope: !4)
+!33 = !DILocation(line: 9, scope: !29)
+!34 = !DILocation(line: 10, scope: !4)
+!35 = !DILocation(line: 11, scope: !31)
+!36 = !DILocation(line: 12, scope: !4)
+!37 = !DILocation(line: 14, scope: !13)
+!38 = !DILocation(line: 6, scope: !4, inlinedAt: !39)
+!39 = distinct !DILocation(line: 15, scope: !13)
+!40 = !DILocation(line: 8, scope: !29, inlinedAt: !39)
+!41 = !DILocation(line: 10, scope: !31, inlinedAt: !39)
+!42 = !DILocation(line: 8, scope: !4, inlinedAt: !39)
+!43 = !DILocation(line: 9, scope: !29, inlinedAt: !39)
+!44 = !DILocation(line: 11, scope: !31, inlinedAt: !39)
+!45 = !DILocation(line: 16, scope: !13)
--- a/test/LTO/X86/diagnostic-handler-noexit.ll
+++ b/test/LTO/X86/diagnostic-handler-noexit.ll
@ -0,0 +1,13 @@
+; LTO default diagnostic handler should be non-exiting.
+; This test verifies that after addModule() encounters an error, the diagnostic
+; handler does not call exit(1) and instead returns to the caller of addModule.
+
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-as <%s >%t2
+; RUN: not llvm-lto -o /dev/null %t1 %t2 2>&1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: Linking globals named 'goodboy': symbol multiply defined!
+; CHECK: llvm-lto{{.*}}: error adding file
+@goodboy = global i32 3203383023, align 4    ; 0xbeefbeef
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@ -1,5 +1,8 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
+

 //===----------------------------------------------------------------------===//
 // VOPC Instructions
@ -8,63 +11,81 @@
 // Test forced e64 encoding

 v_cmp_lt_f32_e64 s[2:3], v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]
+

 //
 // Modifier tests:
 //

 v_cmp_lt_f32 s[2:3] -v4, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] 
+// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x20]

 v_cmp_lt_f32 s[2:3]  v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]

 v_cmp_lt_f32 s[2:3] -v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x60]

 v_cmp_lt_f32 s[2:3] |v4|, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_lt_f32 s[2:3] v4, |v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_lt_f32 s[2:3] |v4|, |v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_lt_f32 s[2:3] -|v4|, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x20]

 v_cmp_lt_f32 s[2:3] v4, -|v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x40]

 v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x60]

 //
 // Instruction tests:
 //

 v_cmp_f_f32 s[2:3], v4, v6
-// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x40,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_lt_f32 s[2:3], v4, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_eq_f32 s[2:3], v4, v6
-// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x42,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_le_f32 s[2:3], v4, v6
-// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x43,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_gt_f32 s[2:3], v4, v6
-// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x44,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_lg_f32 s[2:3], v4, v6
-// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x45,0xd0,0x04,0x0d,0x02,0x00]

 v_cmp_ge_f32 s[2:3], v4, v6
-// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x46,0xd0,0x04,0x0d,0x02,0x00]

 // TODO: Finish VOPC

@ -77,22 +98,28 @@ v_cmp_ge_f32 s[2:3], v4, v6
 // 

 v_fract_f32 v1, -v2
-// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
+// SICI: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
+// VI:   v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x20]

 v_fract_f32 v1, |v2|
-// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
+// SICI: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x00]

 v_fract_f32 v1, -|v2|
-// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
+// SICI: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
+// VI:   v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x20]

 v_fract_f32 v1, v2 clamp
-// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
+// SICI: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x00]

 v_fract_f32 v1, v2 mul:2
-// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
+// SICI: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
+// VI:   v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x08]

 v_fract_f32 v1, v2, div:2 clamp
-// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
+// SICI: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
+// VI:   v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x18]

 // TODO: Finish VOP1

@ -102,37 +129,47 @@ v_fract_f32 v1, v2, div:2 clamp

 // Test forced e64 encoding with e32 operands

-v_ldexp_f32_e64 v1, v3, v5
-// CHECK: v_ldexp_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x56,0xd2,0x03,0x0b,0x02,0x00]
+v_add_f32_e64 v1, v3, v5
+// SICI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x02,0x00]
+// VI:   v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x02,0x00]


 // TODO: Modifier tests

 v_cndmask_b32 v1, v3, v5, s[4:5]
-// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
+// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
+// VI:   v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]

 //TODO: readlane, writelane

 v_add_f32 v1, v3, s5
-// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00]

 v_sub_f32 v1, v3, s5
-// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x02,0xd1,0x03,0x0b,0x00,0x00]

 v_subrev_f32 v1, v3, s5
-// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x03,0xd1,0x03,0x0b,0x00,0x00]

 v_mac_legacy_f32 v1, v3, s5
-// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+// FIXME: The error message should be: error: instruction not supported on this GPU
+// NOVI: error: invalid operand for instruction

 v_mul_legacy_f32 v1, v3, s5
-// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x04,0xd1,0x03,0x0b,0x00,0x00]

 v_mul_f32 v1, v3, s5
-// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x05,0xd1,0x03,0x0b,0x00,0x00]

 v_mul_i32_i24 v1, v3, s5
-// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd1,0x03,0x0b,0x00,0x00]

 ///===---------------------------------------------------------------------===//
 // VOP3 Instructions
@ -141,7 +178,8 @@ v_mul_i32_i24 v1, v3, s5
 // TODO: Modifier tests

 v_mad_legacy_f32 v2, v4, v6, v8
-// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
+// SICI: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
+// VI:   v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0xc0,0xd1,0x04,0x0d,0x22,0x04]



--- a/test/MC/ARM/directive-arch-semantic-action.s
+++ b/test/MC/ARM/directive-arch-semantic-action.s
@ -0,0 +1,12 @@
+@ RUN: not llvm-mc -triple arm-gnueabi-linux -filetype asm %s 2>&1 | FileCheck %s
+
+        .arch	armv6
+        dsb
+@ CHECK: error: instruction requires: data-barriers
+
+        .arch   armv7
+        dsb
+@ CHECK-NOT: error: instruction requires: data-barriers
+
+        .arch   invalid_architecture_name
+@ CHECK: error: Unknown arch name
--- a/test/MC/Sparc/sparc-directive-xword.s
+++ b/test/MC/Sparc/sparc-directive-xword.s
@ -1,5 +1,6 @@
 ! RUN: not llvm-mc %s -arch=sparc   -show-encoding 2>&1 | FileCheck %s --check-prefix=SPARC32
-! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARC64
+! RUN: llvm-mc %s -triple sparc64 -show-encoding | FileCheck %s --check-prefix=SPARC64
+! RUN: llvm-mc %s -triple sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARCV9

        ! SPARC32:       error: unknown directive
        ! SPARC32-NEXT:  .xword 65536
@ -8,3 +9,5 @@
        ! SPARC64:  .xword 65536
        .xword 65536

+        ! SPARCV9:  .xword 65536
+        .xword 65536
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@ -214,8 +214,11 @@ int main(int argc, char **argv) {
    if (SetMergedModule && i == BaseArg) {
      // Transfer ownership to the code generator.
      CodeGen.setModule(Module.release());
-    } else if (!CodeGen.addModule(Module.get()))
+    } else if (!CodeGen.addModule(Module.get())) {
+      // Print a message here so that we know addModule() did not abort.
+      errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n";
      return 1;
+    }

    unsigned NumSyms = LTOMod->getSymbolCount();
    for (unsigned I = 0; I < NumSyms; ++I) {
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@ -61,7 +61,7 @@ endif

 ifeq ($(HOST_OS), $(filter $(HOST_OS), DragonFly Linux FreeBSD GNU/kFreeBSD GNU))
    # Add soname to the library.
-    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT)
+    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME).1$(SHLIBEXT)
 endif

 ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU GNU/kFreeBSD))
--- a/unittests/Transforms/Utils/Local.cpp
+++ b/unittests/Transforms/Utils/Local.cpp
@ -58,3 +58,40 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) {
  delete bb0;
  delete bb1;
 }
+
+TEST(Local, RemoveDuplicatePHINodes) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<> B(C);
+
+  std::unique_ptr<Function> F(
+      Function::Create(FunctionType::get(B.getVoidTy(), false),
+                       GlobalValue::ExternalLinkage, "F"));
+  BasicBlock *Entry(BasicBlock::Create(C, "", F.get()));
+  BasicBlock *BB(BasicBlock::Create(C, "", F.get()));
+  BranchInst::Create(BB, Entry);
+
+  B.SetInsertPoint(BB);
+
+  AssertingVH<PHINode> P1 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P1->addIncoming(B.getInt32(42), Entry);
+
+  PHINode *P2 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P2->addIncoming(B.getInt32(42), Entry);
+
+  AssertingVH<PHINode> P3 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P3->addIncoming(B.getInt32(42), Entry);
+  P3->addIncoming(B.getInt32(23), BB);
+
+  PHINode *P4 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P4->addIncoming(B.getInt32(42), Entry);
+  P4->addIncoming(B.getInt32(23), BB);
+
+  P1->addIncoming(P3, BB);
+  P2->addIncoming(P4, BB);
+  BranchInst::Create(BB, BB);
+
+  // Verify that we can eliminate PHIs that become duplicates after chaning PHIs
+  // downstream.
+  EXPECT_TRUE(EliminateDuplicatePHINodes(BB));
+  EXPECT_EQ(3U, BB->size());
+}