Upgrade our copy of llvm/clang to 3.4.1 release. This release contains

mostly fixes, for the following upstream bugs: http://llvm.org/PR16365 http://llvm.org/PR17473 http://llvm.org/PR18000 http://llvm.org/PR18068 http://llvm.org/PR18102 http://llvm.org/PR18165 http://llvm.org/PR18260 http://llvm.org/PR18290 http://llvm.org/PR18316 http://llvm.org/PR18460 http://llvm.org/PR18473 http://llvm.org/PR18515 http://llvm.org/PR18526 http://llvm.org/PR18600 http://llvm.org/PR18762 http://llvm.org/PR18773 http://llvm.org/PR18860 http://llvm.org/PR18994 http://llvm.org/PR19007 http://llvm.org/PR19010 http://llvm.org/PR19033 http://llvm.org/PR19059 http://llvm.org/PR19144 http://llvm.org/PR19326 MFC after: 2 weeks
2014-05-12 18:45:56 +00:00 · 2014-05-12 18:45:56 +00:00 · 85d60e68ac
commit 85d60e68ac
parent 5c50ff3f99 f73d5f23a8 68bcb7db19
82 changed files with 1008 additions and 466 deletions
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@ -38,6 +38,43 @@
 #   xargs -n1 | sort | uniq -d;
 # done

+# 20140512: new clang import which bumps version from 3.4 to 3.4.1.
+OLD_FILES+=usr/include/clang/3.4/__wmmintrin_aes.h
+OLD_FILES+=usr/include/clang/3.4/__wmmintrin_pclmul.h
+OLD_FILES+=usr/include/clang/3.4/altivec.h
+OLD_FILES+=usr/include/clang/3.4/ammintrin.h
+OLD_FILES+=usr/include/clang/3.4/avx2intrin.h
+OLD_FILES+=usr/include/clang/3.4/avxintrin.h
+OLD_FILES+=usr/include/clang/3.4/bmi2intrin.h
+OLD_FILES+=usr/include/clang/3.4/bmiintrin.h
+OLD_FILES+=usr/include/clang/3.4/cpuid.h
+OLD_FILES+=usr/include/clang/3.4/emmintrin.h
+OLD_FILES+=usr/include/clang/3.4/f16cintrin.h
+OLD_FILES+=usr/include/clang/3.4/fma4intrin.h
+OLD_FILES+=usr/include/clang/3.4/fmaintrin.h
+OLD_FILES+=usr/include/clang/3.4/immintrin.h
+OLD_FILES+=usr/include/clang/3.4/lzcntintrin.h
+OLD_FILES+=usr/include/clang/3.4/mm3dnow.h
+OLD_FILES+=usr/include/clang/3.4/mm_malloc.h
+OLD_FILES+=usr/include/clang/3.4/mmintrin.h
+OLD_FILES+=usr/include/clang/3.4/module.map
+OLD_FILES+=usr/include/clang/3.4/nmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/pmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/popcntintrin.h
+OLD_FILES+=usr/include/clang/3.4/prfchwintrin.h
+OLD_FILES+=usr/include/clang/3.4/rdseedintrin.h
+OLD_FILES+=usr/include/clang/3.4/rtmintrin.h
+OLD_FILES+=usr/include/clang/3.4/shaintrin.h
+OLD_FILES+=usr/include/clang/3.4/smmintrin.h
+OLD_FILES+=usr/include/clang/3.4/tbmintrin.h
+OLD_FILES+=usr/include/clang/3.4/tmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/wmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/x86intrin.h
+OLD_FILES+=usr/include/clang/3.4/xmmintrin.h
+OLD_FILES+=usr/include/clang/3.4/xopintrin.h
+OLD_FILES+=usr/include/clang/3.4/arm_neon.h
+OLD_FILES+=usr/include/clang/3.4/module.map
+OLD_DIRS+=usr/include/clang/3.4
 # 20140505: Bogusly installing src.opts.mk
 OLD_FILES+=usr/share/mk/src.opts.mk
 # 20140505: Reject PR kern/187551
--- a/3
+++ b/3
@ -31,6 +31,9 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 11.x IS SLOW:
 	disable the most expensive debugging functionality run
 	"ln -s 'abort:false,junk:false' /etc/malloc.conf".)

+20140512:
+	Clang and llvm have been upgraded to 3.4.1 release.
+
 20140508:
 	We bogusly installed src.opts.mk in /usr/share/mk. This file should
 	be removed to avoid issues in the future (and has been added to
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@ -1758,68 +1758,68 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
      Intrinsic<[llvm_v2f64_ty],
        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
      Intrinsic<[llvm_v4f64_ty],
        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
      Intrinsic<[llvm_v2f64_ty],
        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
      Intrinsic<[llvm_v4f64_ty],
        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
      Intrinsic<[llvm_v4f32_ty],
        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
      Intrinsic<[llvm_v8f32_ty],
        [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
      Intrinsic<[llvm_v4f32_ty],
        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
      Intrinsic<[llvm_v4f32_ty],
        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;

  def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
      Intrinsic<[llvm_v2i64_ty],
        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
      Intrinsic<[llvm_v4i64_ty],
        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
      Intrinsic<[llvm_v2i64_ty],
        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
      Intrinsic<[llvm_v4i64_ty],
        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
      Intrinsic<[llvm_v4i32_ty],
        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
      Intrinsic<[llvm_v8i32_ty],
        [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
      Intrinsic<[llvm_v4i32_ty],
        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
  def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
      Intrinsic<[llvm_v4i32_ty],
        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
+        [IntrReadArgMem]>;
 }

 // Misc.
@ -2909,28 +2909,28 @@ let TargetPrefix = "x86" in {
  def int_x86_avx512_gather_dpd_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">,
          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
                     llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_dps_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">,
          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty,
                     llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_qpd_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">,
          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
                     llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_qps_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">,
          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty,
                     llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;

  def int_x86_avx512_gather_dpd_512  : GCCBuiltin<"__builtin_ia32_gatherdpd512">,
          Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
                     llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_dps_512  : GCCBuiltin<"__builtin_ia32_gatherdps512">,
          Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
                     llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_qpd_512  : GCCBuiltin<"__builtin_ia32_gatherqpd512">,
          Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                     llvm_i32_ty],
@ -2938,12 +2938,12 @@ let TargetPrefix = "x86" in {
  def int_x86_avx512_gather_qps_512  : GCCBuiltin<"__builtin_ia32_gatherqps512">,
          Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty, 
                     llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;

  def int_x86_avx512_gather_dpq_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">,
          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
                     llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;
  def int_x86_avx512_gather_dpi_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">,
          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty,
                     llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
@ -2955,7 +2955,7 @@ let TargetPrefix = "x86" in {
  def int_x86_avx512_gather_qpi_mask_512  : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">,
          Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty,
                     llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
-                    [IntrReadMem]>;
+                    [IntrReadArgMem]>;

  def int_x86_avx512_gather_dpq_512  : GCCBuiltin<"__builtin_ia32_gatherdpq512">,
          Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
--- a/contrib/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
@ -266,13 +266,16 @@ namespace llvm {
    /// global as being a weak undefined symbol.
    const char *WeakRefDirective;            // Defaults to NULL.

-    /// WeakDefDirective - This directive, if non-null, is used to declare a
-    /// global as being a weak defined symbol.
-    const char *WeakDefDirective;            // Defaults to NULL.
+    /// True if we have a directive to declare a global as being a weak
+    /// defined symbol.
+    bool HasWeakDefDirective;                // Defaults to false.

-    /// LinkOnceDirective - This directive, if non-null is used to declare a
-    /// global as being a weak defined symbol.  This is used on cygwin/mingw.
-    const char *LinkOnceDirective;           // Defaults to NULL.
+    /// True if we have a directive to declare a global as being a weak
+    /// defined symbol that can be hidden (unexported).
+    bool HasWeakDefCanBeHiddenDirective;     // Defaults to false.
+
+    /// True if we have a .linkonce directive.  This is used on cygwin/mingw.
+    bool HasLinkOnceDirective;               // Defaults to false.

    /// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
    /// declare a symbol as having hidden visibility.
@ -303,6 +306,10 @@ namespace llvm {
    /// uses relocations for references to other .debug_* sections.
    bool DwarfUsesRelocationsAcrossSections;

+    /// DwarfFDESymbolsUseAbsDiff - true if DWARF FDE symbol reference
+    /// relocations should be replaced by an absolute difference.
+    bool DwarfFDESymbolsUseAbsDiff;
+
    /// DwarfRegNumForCFI - True if dwarf register numbers are printed
    /// instead of symbolic register names in .cfi_* directives.
    bool DwarfRegNumForCFI;  // Defaults to false;
@ -497,8 +504,11 @@ namespace llvm {
    bool hasIdentDirective() const { return HasIdentDirective; }
    bool hasNoDeadStrip() const { return HasNoDeadStrip; }
    const char *getWeakRefDirective() const { return WeakRefDirective; }
-    const char *getWeakDefDirective() const { return WeakDefDirective; }
-    const char *getLinkOnceDirective() const { return LinkOnceDirective; }
+    bool hasWeakDefDirective() const { return HasWeakDefDirective; }
+    bool hasWeakDefCanBeHiddenDirective() const {
+      return HasWeakDefCanBeHiddenDirective;
+    }
+    bool hasLinkOnceDirective() const { return HasLinkOnceDirective; }

    MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
    MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
@ -528,6 +538,9 @@ namespace llvm {
    bool doesDwarfUseRelocationsAcrossSections() const {
      return DwarfUsesRelocationsAcrossSections;
    }
+    bool doDwarfFDESymbolsUseAbsDiff() const {
+      return DwarfFDESymbolsUseAbsDiff;
+    }
    bool useDwarfRegNumForCFI() const {
      return DwarfRegNumForCFI;
    }
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@ -18,7 +18,10 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
@ -38,6 +41,12 @@
 #include <algorithm>
 using namespace llvm;

+/// Cutoff after which to stop analysing a set of phi nodes potentially involved
+/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// careful with value equivalence. We use reachability to make sure a value
+/// cannot be involved in a cycle.
+const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
+
 //===----------------------------------------------------------------------===//
 // Useful predicates
 //===----------------------------------------------------------------------===//
@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
  return V;
 }

-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
-                               const SmallVectorImpl<VariableGEPIndex> &Src) {
-  if (Src.empty()) return;
-
-  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
-    const Value *V = Src[i].V;
-    ExtensionKind Extension = Src[i].Extension;
-    int64_t Scale = Src[i].Scale;
-
-    // Find V in Dest.  This is N^2, but pointer indices almost never have more
-    // than a few variable indexes.
-    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
-      if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
-      // If we found it, subtract off Scale V's from the entry in Dest.  If it
-      // goes to zero, remove the entry.
-      if (Dest[j].Scale != Scale)
-        Dest[j].Scale -= Scale;
-      else
-        Dest.erase(Dest.begin()+j);
-      Scale = 0;
-      break;
-    }
-
-    // If we didn't consume this entry, add it to the end of the Dest list.
-    if (Scale) {
-      VariableGEPIndex Entry = { V, Extension, -Scale };
-      Dest.push_back(Entry);
-    }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // BasicAliasAnalysis Pass
 //===----------------------------------------------------------------------===//
@ -492,6 +465,7 @@ namespace {
      // SmallDenseMap if it ever grows larger.
      // FIXME: This should really be shrink_to_inline_capacity_and_clear().
      AliasCache.shrink_and_clear();
+      VisitedPhiBBs.clear();
      return Alias;
    }

@ -532,9 +506,39 @@ namespace {
    typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
    AliasCacheTy AliasCache;

+    /// \brief Track phi nodes we have visited. When interpret "Value" pointer
+    /// equality as value equality we need to make sure that the "Value" is not
+    /// part of a cycle. Otherwise, two uses could come from different
+    /// "iterations" of a cycle and see different values for the same "Value"
+    /// pointer.
+    /// The following example shows the problem:
+    ///   %p = phi(%alloca1, %addr2)
+    ///   %l = load %ptr
+    ///   %addr1 = gep, %alloca2, 0, %l
+    ///   %addr2 = gep  %alloca2, 0, (%l + 1)
+    ///      alias(%p, %addr1) -> MayAlias !
+    ///   store %l, ...
+    SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
+
    // Visited - Track instructions visited by pointsToConstantMemory.
    SmallPtrSet<const Value*, 16> Visited;

+    /// \brief Check whether two Values can be considered equivalent.
+    ///
+    /// In addition to pointer equivalence of \p V1 and \p V2 this checks
+    /// whether they can not be part of a cycle in the value graph by looking at
+    /// all visited phi nodes an making sure that the phis cannot reach the
+    /// value. We have to do this because we are looking through phi nodes (That
+    /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+    bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+    /// \brief Dest and Src are the variable indices from two decomposed
+    /// GetElementPtr instructions GEP1 and GEP2 which have common base
+    /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+    /// difference between the two pointers.
+    void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                            const SmallVectorImpl<VariableGEPIndex> &Src);
+
    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
    // instruction against another.
    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
@ -1005,7 +1009,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
        return NoAlias;
      }
    } else {
-      if (V1Size != UnknownSize) {
+      // We have the situation where:
+      // +                +
+      // | BaseOffset     |
+      // ---------------->|
+      // |-->V1Size       |-------> V2Size
+      // GEP1             V2
+      // We need to know that V2Size is not unknown, otherwise we might have
+      // stripped a gep with negative index ('gep <ptr>, -1, ...).
+      if (V1Size != UnknownSize && V2Size != UnknownSize) {
        if (-(uint64_t)GEP1BaseOffset < V1Size)
          return PartialAlias;
        return NoAlias;
@ -1094,6 +1106,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                             const MDNode *PNTBAAInfo,
                             const Value *V2, uint64_t V2Size,
                             const MDNode *V2TBAAInfo) {
+  // Track phi nodes we have visited. We use this information when we determine
+  // value equivalence.
+  VisitedPhiBBs.insert(PN->getParent());
+
  // If the values are PHIs in the same block, we can do a more precise
  // as well as efficient check: just check for aliases between the values
  // on corresponding edges.
@ -1187,7 +1203,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
  V2 = V2->stripPointerCasts();

  // Are we checking for alias of the same value?
-  if (V1 == V2) return MustAlias;
+  // Because we look 'through' phi nodes we could look at "Value" pointers from
+  // different iterations. We must therefore make sure that this is not the
+  // case. The function isValueEqualInPotentialCycles ensures that this cannot
+  // happen by looking at the visited phi nodes and making sure they cannot
+  // reach the value.
+  if (isValueEqualInPotentialCycles(V1, V2))
+    return MustAlias;

  if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
    return NoAlias;  // Scalars cannot alias each other
@ -1307,3 +1329,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
                         Location(V2, V2Size, V2TBAAInfo));
  return AliasCache[Locs] = Result;
 }
+
+bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
+                                                       const Value *V2) {
+  if (V != V2)
+    return false;
+
+  const Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
+    return false;
+
+  // Use dominance or loop info if available.
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+
+  // Make sure that the visited phis cannot reach the Value. This ensures that
+  // the Values cannot come from different iterations of a potential cycle the
+  // phi nodes could be involved in.
+  for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
+                                                    PE = VisitedPhiBBs.end();
+       PI != PE; ++PI)
+    if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+      return false;
+
+  return true;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+void BasicAliasAnalysis::GetIndexDifference(
+    SmallVectorImpl<VariableGEPIndex> &Dest,
+    const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty())
+    return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    ExtensionKind Extension = Src[i].Extension;
+    int64_t Scale = Src[i].Scale;
+
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
+          Dest[j].Extension != Extension)
+        continue;
+
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin() + j);
+      Scale = 0;
+      break;
+    }
+
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = { V, Extension, -Scale };
+      Dest.push_back(Entry);
+    }
+  }
+}
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@ -187,15 +187,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,

    if (AddUserToIVUsers) {
      // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I));
-      IVStrideUse &NewUse = IVUses.back();
+      IVStrideUse &NewUse = AddUser(User, I);
      // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
      // The regular return value here is discarded; instead of recording
      // it, we just recompute it when we need it.
+      const SCEV *OriginalISE = ISE;
      ISE = TransformForPostIncUse(NormalizeAutodetect,
                                   ISE, User, I,
                                   NewUse.PostIncLoops,
                                   *SE, *DT);
+
+      // PostIncNormalization effectively simplifies the expression under
+      // pre-increment assumptions. Those assumptions (no wrapping) might not
+      // hold for the post-inc value. Catch such cases by making sure the
+      // transformation is invertible.
+      if (OriginalISE != ISE) {
+        const SCEV *DenormalizedISE =
+          TransformForPostIncUse(Denormalize, ISE, User, I,
+              NewUse.PostIncLoops, *SE, *DT);
+
+        // If we normalized the expression, but denormalization doesn't give the
+        // original one, discard this user.
+        if (OriginalISE != DenormalizedISE) {
+          DEBUG(dbgs() << "   DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+                       << *ISE << '\n');
+          IVUses.pop_back();
+          return false;
+        }
+      }
      DEBUG(if (SE->getSCEV(I) != ISE)
              dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
    }
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@ -6218,7 +6218,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
  // LHS' type is checked for above.
  if (getTypeSizeInBits(LHS->getType()) >
      getTypeSizeInBits(FoundLHS->getType())) {
-    if (CmpInst::isSigned(Pred)) {
+    if (CmpInst::isSigned(FoundPred)) {
      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
    } else {
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -223,13 +223,14 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
  case GlobalValue::WeakAnyLinkage:
  case GlobalValue::WeakODRLinkage:
  case GlobalValue::LinkerPrivateWeakLinkage:
-    if (MAI->getWeakDefDirective() != 0) {
+    if (MAI->hasWeakDefDirective()) {
      // .globl _foo
      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);

      bool CanBeHidden = false;

-      if (Linkage == GlobalValue::LinkOnceODRLinkage) {
+      if (Linkage == GlobalValue::LinkOnceODRLinkage &&
+          MAI->hasWeakDefCanBeHiddenDirective()) {
        if (GV->hasUnnamedAddr()) {
          CanBeHidden = true;
        } else {
@ -244,7 +245,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
      else
        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
-    } else if (MAI->getLinkOnceDirective() != 0) {
+    } else if (MAI->hasLinkOnceDirective()) {
      // .globl _foo
      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
      //NOTE: linkonce is handled by the section the symbol was assigned to.
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -8547,7 +8547,10 @@ struct MemOpLink {
 // base ptr.
 struct ConsecutiveMemoryChainSorter {
  bool operator()(MemOpLink LHS, MemOpLink RHS) {
-    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+    return
+        LHS.OffsetFromBase < RHS.OffsetFromBase ||
+        (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+         LHS.SequenceNum > RHS.SequenceNum);
  }
 };

--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@ -210,6 +210,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
  case ISD::SRL:
  case ISD::ROTL:
  case ISD::ROTR:
+  case ISD::BSWAP:
  case ISD::CTLZ:
  case ISD::CTTZ:
  case ISD::CTLZ_ZERO_UNDEF:
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@ -219,8 +219,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
  bool Cluster = false;
  SDNode *Base = Node;
+  // This algorithm requires a reasonably low use count before finding a match
+  // to avoid uselessly blowing up compile time in large blocks.
+  unsigned UseCount = 0;
  for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
-       I != E; ++I) {
+       I != E && UseCount < 100; ++I, ++UseCount) {
    SDNode *User = *I;
    if (User == Node || !Visited.insert(User))
      continue;
@ -237,6 +240,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
    if (Offset2 < Offset1)
      Base = User;
    Cluster = true;
+    // Reset UseCount to allow more matches.
+    UseCount = 0;
  }

  if (!Cluster)
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@ -76,8 +76,9 @@ MCAsmInfo::MCAsmInfo() {
  HasIdentDirective = false;
  HasNoDeadStrip = false;
  WeakRefDirective = 0;
-  WeakDefDirective = 0;
-  LinkOnceDirective = 0;
+  HasWeakDefDirective = false;
+  HasWeakDefCanBeHiddenDirective = false;
+  HasLinkOnceDirective = false;
  HiddenVisibilityAttr = MCSA_Hidden;
  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
  ProtectedVisibilityAttr = MCSA_Protected;
@ -85,6 +86,7 @@ MCAsmInfo::MCAsmInfo() {
  SupportsDebugInformation = false;
  ExceptionsType = ExceptionHandling::None;
  DwarfUsesRelocationsAcrossSections = true;
+  DwarfFDESymbolsUseAbsDiff = false;
  DwarfRegNumForCFI = false;
  HasMicrosoftFastStdCallMangling = false;
  NeedsDwarfSectionOffsetDirective = false;
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@ -27,7 +27,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
  HasSingleParameterDotFile = false;
  PrivateGlobalPrefix = "L";  // Prefix for private global symbols
  WeakRefDirective = "\t.weak\t";
-  LinkOnceDirective = "\t.linkonce discard\n";
+  HasLinkOnceDirective = true;

  // Doesn't support visibility:
  HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@ -36,7 +36,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
  InlineAsmEnd = " InlineAsm End";

  // Directives:
-  WeakDefDirective = "\t.weak_definition ";
+  HasWeakDefDirective = true;
+  HasWeakDefCanBeHiddenDirective = true;
  WeakRefDirective = "\t.weak_reference ";
  ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
  HasMachoZeroFillDirective = true;  // Uses .zerofill
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@ -839,8 +839,9 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
  }
 }

-static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
-                       unsigned symbolEncoding, const char *comment = 0) {
+static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding, bool isEH,
+                       const char *comment = 0) {
  MCContext &context = streamer.getContext();
  const MCAsmInfo *asmInfo = context.getAsmInfo();
  const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
@ -848,7 +849,10 @@ static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
                                                 streamer);
  unsigned size = getSizeForEncoding(streamer, symbolEncoding);
  if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
-  streamer.EmitAbsValue(v, size);
+  if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH)
+    streamer.EmitAbsValue(v, size);
+  else
+    streamer.EmitValue(v, size);
 }

 static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
@ -1347,7 +1351,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
  unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
                             : (unsigned)dwarf::DW_EH_PE_absptr;
  unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
-  EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
+  EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");

  // PC Range
  const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
@ -1367,8 +1371,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,

    // Augmentation Data
    if (frame.Lsda)
-      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
-                 "Language Specific Data Area");
+      EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true,
+                    "Language Specific Data Area");
  }

  // Call Frame Instructions
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@ -4297,6 +4297,10 @@ bool AsmParser::parseMSInlineAsm(
      break;
    }
    case AOK_DotOperator:
+      // Insert the dot if the user omitted it.
+      OS.flush();
+      if (AsmStringIR.at(AsmStringIR.size() - 1) != '.')
+        OS << '.';
      OS << (*I).Val;
      break;
    }
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -31,12 +31,8 @@ using namespace llvm;

 static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
-
-  if (Subtarget->isTargetLinux())
-    return new AArch64LinuxTargetObjectFile();
-  if (Subtarget->isTargetELF())
-    return new TargetLoweringObjectFileELF();
-  llvm_unreachable("unknown subtarget type");
+  assert (Subtarget->isTargetELF() && "unknown subtarget type");
+  return new AArch64ElfTargetObjectFile();
 }

 AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
@ -2782,7 +2778,7 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
-  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();

  // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
  // rather than just 8.
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@ -2587,6 +2587,7 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
                        pat, itin> {
  let mayStore = 1;
  let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+  let Constraints = "@earlyclobber $Rs";
 }

 multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@ -22,3 +22,10 @@ AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
  InitializeELF(TM.Options.UseInitArray);
 }
+
+void
+AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx,
+                                       const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@ -20,8 +20,12 @@

 namespace llvm {

-  /// AArch64LinuxTargetObjectFile - This implementation is used for linux
-  /// AArch64.
+  /// AArch64ElfTargetObjectFile - This implementation is used for ELF
+  /// AArch64 targets.
+  class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF {
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  };
+
  class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
  };
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@ -418,7 +418,8 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
    if (!MO.isReg() || !MO.isUse())
      continue;
    if (!usesRegClass(MO, &ARM::DPRRegClass) &&
-        !usesRegClass(MO, &ARM::QPRRegClass))
+        !usesRegClass(MO, &ARM::QPRRegClass) &&
+        !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
      continue;

    Defs.push_back(MO.getReg());
@ -538,7 +539,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
  InsertPt++;
  unsigned Out;

-  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+  // DPair has the same length as QPR and also has two DPRs as subreg.
+  // Treat DPair as QPR.
+  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
+      MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
    unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
                                         ARM::dsub_0, &ARM::DPRRegClass);
    unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
@ -571,7 +575,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
      default: llvm_unreachable("Unknown preferred lane!");
    }

-    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+    // Treat DPair as QPR
+    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
+                   usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);

    Out = createImplicitDef(MBB, InsertPt, DL);
    Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    case ARM::VLD3d16Pseudo:
    case ARM::VLD3d32Pseudo:
    case ARM::VLD1d64TPseudo:
+    case ARM::VLD1d64TPseudoWB_fixed:
    case ARM::VLD3d8Pseudo_UPD:
    case ARM::VLD3d16Pseudo_UPD:
    case ARM::VLD3d32Pseudo_UPD:
@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    case ARM::VLD4d16Pseudo:
    case ARM::VLD4d32Pseudo:
    case ARM::VLD1d64QPseudo:
+    case ARM::VLD1d64QPseudoWB_fixed:
    case ARM::VLD4d8Pseudo_UPD:
    case ARM::VLD4d16Pseudo_UPD:
    case ARM::VLD4d32Pseudo_UPD:
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},

 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
+{ ARM::VLD1d64QPseudoWB_fixed,  ARM::VLD1d64Qwb_fixed,   true,  true, false, SingleSpc,  4, 1 ,false},
 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
+{ ARM::VLD1d64TPseudoWB_fixed,  ARM::VLD1d64Twb_fixed,   true,  true, false, SingleSpc,  3, 1 ,false},

 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
    case ARM::VLD3d16Pseudo:
    case ARM::VLD3d32Pseudo:
    case ARM::VLD1d64TPseudo:
+    case ARM::VLD1d64TPseudoWB_fixed:
    case ARM::VLD3d8Pseudo_UPD:
    case ARM::VLD3d16Pseudo_UPD:
    case ARM::VLD3d32Pseudo_UPD:
@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
    case ARM::VLD4d16Pseudo:
    case ARM::VLD4d32Pseudo:
    case ARM::VLD1d64QPseudo:
+    case ARM::VLD1d64QPseudoWB_fixed:
    case ARM::VLD4d8Pseudo_UPD:
    case ARM::VLD4d16Pseudo_UPD:
    case ARM::VLD4d32Pseudo_UPD:
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
  return CurDAG->getTargetConstant(Alignment, MVT::i32);
 }

+static bool isVLDfixed(unsigned Opc)
+{
+  switch (Opc) {
+  default: return false;
+  case ARM::VLD1d8wb_fixed : return true;
+  case ARM::VLD1d16wb_fixed : return true;
+  case ARM::VLD1d64Qwb_fixed : return true;
+  case ARM::VLD1d32wb_fixed : return true;
+  case ARM::VLD1d64wb_fixed : return true;
+  case ARM::VLD1d64TPseudoWB_fixed : return true;
+  case ARM::VLD1d64QPseudoWB_fixed : return true;
+  case ARM::VLD1q8wb_fixed : return true;
+  case ARM::VLD1q16wb_fixed : return true;
+  case ARM::VLD1q32wb_fixed : return true;
+  case ARM::VLD1q64wb_fixed : return true;
+  case ARM::VLD2d8wb_fixed : return true;
+  case ARM::VLD2d16wb_fixed : return true;
+  case ARM::VLD2d32wb_fixed : return true;
+  case ARM::VLD2q8PseudoWB_fixed : return true;
+  case ARM::VLD2q16PseudoWB_fixed : return true;
+  case ARM::VLD2q32PseudoWB_fixed : return true;
+  case ARM::VLD2DUPd8wb_fixed : return true;
+  case ARM::VLD2DUPd16wb_fixed : return true;
+  case ARM::VLD2DUPd32wb_fixed : return true;
+  }
+}
+
+static bool isVSTfixed(unsigned Opc)
+{
+  switch (Opc) {
+  default: return false;
+  case ARM::VST1d8wb_fixed : return true;
+  case ARM::VST1d16wb_fixed : return true;
+  case ARM::VST1d32wb_fixed : return true;
+  case ARM::VST1d64wb_fixed : return true;
+  case ARM::VST1q8wb_fixed : return true; 
+  case ARM::VST1q16wb_fixed : return true; 
+  case ARM::VST1q32wb_fixed : return true; 
+  case ARM::VST1q64wb_fixed : return true; 
+  case ARM::VST1d64TPseudoWB_fixed : return true;
+  case ARM::VST1d64QPseudoWB_fixed : return true;
+  case ARM::VST2d8wb_fixed : return true;
+  case ARM::VST2d16wb_fixed : return true;
+  case ARM::VST2d32wb_fixed : return true;
+  case ARM::VST2q8PseudoWB_fixed : return true;
+  case ARM::VST2q16PseudoWB_fixed : return true;
+  case ARM::VST2q32PseudoWB_fixed : return true;
+  }
+}
+
 // Get the register stride update opcode of a VLD/VST instruction that
 // is otherwise equivalent to the given fixed stride updating instruction.
 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
+    && "Incorrect fixed stride updating instruction.");
  switch (Opc) {
  default: break;
  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
+  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
+  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
+  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;

  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
      SDValue Inc = N->getOperand(AddrOpIdx + 1);
      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
      // case entirely when the rest are updated to that form, too.
-      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
        Opc = getVLDSTRegisterUpdateOpcode(Opc);
-      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
      // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
          !isa<ConstantSDNode>(Inc.getNode()))
        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
    }
@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
      // case entirely when the rest are updated to that form, too.
      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
        Opc = getVLDSTRegisterUpdateOpcode(Opc);
-      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
      // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
-          !isa<ConstantSDNode>(Inc.getNode()))
-        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+      if  (!isa<ConstantSDNode>(Inc.getNode()))
+        Ops.push_back(Inc);
+      else if (NumVecs > 2 && !isVSTfixed(Opc))
+        Ops.push_back(Reg0);
    }
    Ops.push_back(SrcReg);
    Ops.push_back(Pred);
@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
                                         ARM::VLD3d16Pseudo_UPD,
                                         ARM::VLD3d32Pseudo_UPD,
-                                         ARM::VLD1q64wb_fixed};
+                                         ARM::VLD1d64TPseudoWB_fixed};
    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                                          ARM::VLD3q16Pseudo_UPD,
                                          ARM::VLD3q32Pseudo_UPD };
@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
                                         ARM::VLD4d16Pseudo_UPD,
                                         ARM::VLD4d32Pseudo_UPD,
-                                         ARM::VLD1q64wb_fixed};
+                                         ARM::VLD1d64QPseudoWB_fixed};
    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
                                          ARM::VLD4q16Pseudo_UPD,
                                          ARM::VLD4q32Pseudo_UPD };
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;

 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;

 // ...with 4 registers
 class VLD1D4<bits<4> op7_4, string Dt>
@ -769,6 +771,8 @@ defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32">;
 defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;

 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;

 //   VLD2     : Vector Load (multiple 2-element structures)
 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;

 def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>;
 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;

 // ...with 4 registers
@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;

 def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>;
 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;

 //   VST2     : Vector Store (multiple 2-element structures)
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@ -12,10 +12,14 @@
 //===----------------------------------------------------------------------===//

 #include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;

 void PPCMCAsmInfoDarwin::anchor() { }

+/// This version of the constructor is here to maintain ABI compatibility with
+/// LLVM 3.4.0
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
  if (is64Bit) {
    PointerSize = CalleeSaveStackSlotSize = 8;
@ -32,6 +36,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
  SupportsDebugInformation= true; // Debug information.
 }

+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
+  if (is64Bit) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
+  IsLittleEndian = false;
+
+  CommentString = ";";
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+
+  AssemblerDialect = 1;           // New-Style mnemonics.
+  SupportsDebugInformation= true; // Debug information.
+
+  // old assembler lacks some directives
+  // FIXME: this should really be a check on the assembler characteristics
+  // rather than OS version
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+    HasWeakDefCanBeHiddenDirective = false;
+}
+
 void PPCLinuxMCAsmInfo::anchor() { }

 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@ -18,11 +18,15 @@
 #include "llvm/MC/MCAsmInfoELF.h"

 namespace llvm {
+class Triple;

  class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
    virtual void anchor();
  public:
+    /// This version of the constructor is here to maintain ABI compatibility
+    /// with LLVM 3.4.0.
    explicit PPCMCAsmInfoDarwin(bool is64Bit);
+    explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
  };

  class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {

  MCAsmInfo *MAI;
  if (TheTriple.isOSDarwin())
-    MAI = new PPCMCAsmInfoDarwin(isPPC64);
+    MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
  else
    MAI = new PPCLinuxMCAsmInfo(isPPC64);

--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@ -701,13 +701,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
      return;
    }
    break;
-  case PPC::SYNC:
-    // In Book E sync is called msync, handle this special case here...
-    if (Subtarget.isBookE()) {
-      OutStreamer.EmitRawText(StringRef("\tmsync"));
-      return;
-    }
-    break;
  case PPC::LD:
  case PPC::STD:
  case PPC::LWA_32:
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@ -186,6 +186,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
  return MadeChange;
 }

+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ITy->getBitWidth() > (Is32Bit ? 32 : 64);
+
+  return false;
+}
+
 bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
       J != JE; ++J) {
@ -352,13 +359,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
      CastInst *CI = cast<CastInst>(J);
      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          (TT.isArch32Bit() &&
-           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
-            CI->getDestTy()->getScalarType()->isIntegerTy(64))
-          ))
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
        return true;
-    } else if (TT.isArch32Bit() &&
-               J->getType()->getScalarType()->isIntegerTy(64) &&
+    } else if (isLargeIntegerTy(TT.isArch32Bit(),
+                                J->getType()->getScalarType()) &&
               (J->getOpcode() == Instruction::UDiv ||
                J->getOpcode() == Instruction::SDiv ||
                J->getOpcode() == Instruction::URem ||
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@ -892,11 +892,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
  unsigned LoadOpc = PPC::LFD;

  if (SrcVT == MVT::i32) {
-    Addr.Offset = 4;
-    if (!IsSigned)
+    if (!IsSigned) {
      LoadOpc = PPC::LFIWZX;
-    else if (PPCSubTarget.hasLFIWAX())
+      Addr.Offset = 4;
+    } else if (PPCSubTarget.hasLFIWAX()) {
      LoadOpc = PPC::LFIWAX;
+      Addr.Offset = 4;
+    }
  }

  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -261,11 +261,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
    DebugLoc dl;

    if (PPCLowering.getPointerTy() == MVT::i32) {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
    } else {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
+      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
    }
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -2333,7 +2333,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                      MachinePointerInfo(FuncArg, CurArgOffset),
+                                      MachinePointerInfo(FuncArg),
                                      ObjType, false, false, 0);
          } else {
            // For sizes that don't fit a truncating store (3, 5, 6, 7),
@ -2345,7 +2345,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                 MachinePointerInfo(FuncArg, ArgOffset),
+                                 MachinePointerInfo(FuncArg),
                                 false, false, 0);
          }

@ -2369,7 +2369,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                       false, false, 0);
          MemOps.push_back(Store);
          ++GPR_idx;
@ -2665,8 +2665,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(FuncArg,
-                                              CurArgOffset),
+                                            MachinePointerInfo(FuncArg),
                                            ObjType, false, false, 0);
          MemOps.push_back(Store);
          ++GPR_idx;
@ -2690,7 +2689,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                       false, false, 0);
          MemOps.push_back(Store);
          ++GPR_idx;
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -570,12 +570,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
  // update isStoreToStackSlot.

  DebugLoc DL;
-  if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+  if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
                                       .addReg(SrcReg,
                                               getKillRegState(isKill)),
                                       FrameIdx));
-  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
                                       .addReg(SrcReg,
                                               getKillRegState(isKill)),
@ -695,10 +697,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
  // Note: If additional load instructions are added here,
  // update isLoadFromStackSlot.

-  if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+  if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+      PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
                                               DestReg), FrameIdx));
-  } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+  } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+             PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
                                       FrameIdx));
  } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -580,6 +580,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
+def IsNotBookE  : Predicate<"!PPCSubTarget.isBookE()">;

 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@ -1541,8 +1542,17 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
                   "stmw $rS, $dst", LdStLMW, []>;

 def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
-                        "sync $L", LdStSync, []>;
-def : Pat<(int_ppc_sync), (SYNC 0)>;
+                        "sync $L", LdStSync, []>, Requires<[IsNotBookE]>;
+
+let isCodeGenOnly = 1 in {
+  def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+                           "msync", LdStSync, []>, Requires<[IsBookE]> {
+    let L = 0;
+  }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;

 //===----------------------------------------------------------------------===//
 // PPC32 Arithmetic Instructions.
@ -2284,7 +2294,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
 def : Pat<(f64 (fextend f32:$src)),
          (COPY_TO_REGCLASS $src, F8RC)>;

-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;

 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
 def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@ -2373,10 +2384,10 @@ class PPCAsmPseudo<string asm, dag iops>

 def : InstAlias<"sc", (SC 0)>;

-def : InstAlias<"sync", (SYNC 0)>;
-def : InstAlias<"msync", (SYNC 0)>;
-def : InstAlias<"lwsync", (SYNC 1)>;
-def : InstAlias<"ptesync", (SYNC 2)>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;

 def : InstAlias<"wait", (WAIT 0)>;
 def : InstAlias<"waitrsv", (WAIT 1)>;
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@ -144,6 +144,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
 def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
 }

+// The full condition-code register. This is not modeled fully, but defined
+// here primarily, for compatibility with gcc, to allow the inline asm "cc"
+// clobber specification to work.
+def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
+  let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
+}
+
 // Link register
 def LR  : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
 //let Aliases = [LR] in
@ -234,3 +241,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
 def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
  let CopyCost = -1;
 }
+
+def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
+  let isAllocatable = 0;
+}
+
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@ -126,22 +126,6 @@ public:
  /// selection.
  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }

-  /// getDataLayoutString - Return the pointer size and type alignment
-  /// properties of this subtarget.
-  const char *getDataLayoutString() const {
-    // Note, the alignment values for f64 and i64 on ppc64 in Darwin
-    // documentation are wrong; these are correct (i.e. "what gcc does").
-    if (isPPC64() && isSVR4ABI()) {
-      if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
-        return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
-      else
-        return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
-    }
-
-    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
-                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
-  }
-
  /// \brief Reset the features for the PowerPC target.
  virtual void resetSubtargetFeatures(const MachineFunction *MF);
 private:
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@ -33,6 +33,43 @@ extern "C" void LLVMInitializePowerPCTarget() {
  RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
 }

+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+  const Triple &T = ST.getTargetTriple();
+
+  // PPC is big endian
+  std::string Ret = "E";
+
+  // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers.
+  if (ST.isPPC64())
+    Ret += "-p:64:64";
+  else
+    Ret += "-p:32:32";
+
+  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+  // documentation are wrong; these are correct (i.e. "what gcc does").
+  if (ST.isPPC64() || ST.isSVR4ABI())
+    Ret += "-f64:64:64-i64:64:64";
+  else
+    Ret += "-f64:32:64";
+
+  // Set support for 128 floats depending on the ABI.
+  if (!ST.isPPC64() && ST.isSVR4ABI())
+    Ret += "-f128:64:128";
+
+  // Some ABIs support 128 bit vectors.
+  if (ST.isPPC64() && ST.isSVR4ABI())
+    Ret += "-v128:128:128";
+
+  // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones.
+  if (ST.isPPC64())
+    Ret += "-n32:64";
+  else
+    Ret += "-n32";
+
+  return Ret;
+}
+
 PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                   StringRef CPU, StringRef FS,
                                   const TargetOptions &Options,
@ -41,7 +78,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                   bool is64Bit)
  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
    Subtarget(TT, CPU, FS, is64Bit),
-    DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
+    DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
    FrameLowering(Subtarget), JITInfo(*this, is64Bit),
    TLInfo(*this), TSInfo(*this),
    InstrItins(Subtarget.getInstrItineraryData()) {
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);

+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+
  setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
  setOperationAction(ISD::FNEG, MVT::v4f32, Expand);

--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <

 // Bitfield extract patterns

+/*
+
+XXX: The BFE pattern is not working correctly because the XForm is not being
+applied.
+
 def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
 def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
                            SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat <
  (BFE $x, $y, $z)
 >;

+*/
+
 // rotr pattern
 class ROTRPattern <Instruction BIT_ALIGN> : Pat <
  (rotr i32:$src0, i32:$src1),
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@ -13,7 +13,6 @@
 using namespace llvm;
 AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
  HasSingleParameterDotFile = false;
-  WeakDefDirective = 0;
  //===------------------------------------------------------------------===//
  HasSubsectionsViaSymbols = true;
  HasMachoZeroFillDirective = false;
@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
  HasDotTypeDotSizeDirective = false;
  HasNoDeadStrip = true;
  WeakRefDirective = ".weakref\t";
-  LinkOnceDirective = 0;
  //===--- Dwarf Emission Directives -----------------------------------===//
  HasLEB128 = true;
  SupportsDebugInformation = true;
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@ -356,6 +356,7 @@ public:
          DEBUG(dbgs() << CfCount << ":"; I->dump(););
          FetchClauses.push_back(MakeFetchClause(MBB, I));
          CfCount++;
+          LastAlu.back() = 0;
          continue;
        }

--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
    return false;
  }

-  // Get the last instruction in the block.
+  // Remove successive JUMP
+  while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) {
+      MachineBasicBlock::iterator PriorI = llvm::prior(I);
+      if (AllowModify)
+        I->removeFromParent();
+      I = PriorI;
+  }
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in {
                                               i32:$src2))],
    VecALU
  >;
-  def : BFEPattern <BFE_UINT_eg>;
+// XXX: This pattern is broken, disabling for now.  See comment in
+// AMDGPUInstructions.td for more info.
+//  def : BFEPattern <BFE_UINT_eg>;

  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
  defm : BFIPatterns <BFI_INT_eg>;
@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
  let src2 = 0;
  let src2_rel = 0;

-  let Defs = [OQAP];
  let usesCustomInserter = 1;
  let LDS_1A = 1;
  let DisableEncoding = "$dst";
@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
  let BaseOp = name;
  let usesCustomInserter = 1;
  let DisableEncoding = "$dst";
-  let Defs = [OQAP];
 }

 class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
      DstRC == &AMDGPU::M0RegRegClass)
    return false;

-  SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
 }

--- a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {

  Counters Result = ZeroCounts;

+  // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
+  // but we also want to wait for any other outstanding transfers before
+  // signalling other hardware blocks
+  if (MI.getOpcode() == AMDGPU::S_SENDMSG)
+    return LastIssued;
+
  // For each register affected by this
  // instruction increase the result sequence
  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
  : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;

 multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
-                     string revOp = opName> {
+                     RegisterClass src0_rc, string revOp = opName> {

  def _e32 : VOP2 <
-    op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+    op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
    opName#"_e32 $dst, $src0, $src1", pattern
  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;

@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU

 multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {

-  let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
-                                          mayLoad = 1 in {
+  let lds = 0, mayLoad = 1 in {

-  let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
-    def _OFFEN  : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_32:$vaddr),
-                         asm#" $vdata, $srsrc + $vaddr", []>;
-  }
+    let addr64 = 0 in {

-  let offen = 0, idxen = 1, addr64 = 0 in {
-    def _IDXEN  : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
-                         asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
-  }
+      let offen = 0, idxen = 0 in {
+        def _OFFSET : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }

-  let offen = 0, idxen = 0, addr64 = 1 in {
-    def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
-                         (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
-                         asm#" $vdata, $srsrc + $vaddr + $offset", []>;
-  }
+      let offen = 1, idxen = 0, offset = 0 in {
+        def _OFFEN  : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
+                             i1imm:$tfe),
+                             asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+
+      let offen = 0, idxen = 1 in {
+        def _IDXEN  : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_32:$vaddr,
+                             i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+
+      let offen = 1, idxen = 1 in {
+        def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
+                             (ins SReg_128:$srsrc, VReg_64:$vaddr,
+                             SSrc_32:$soffset, i1imm:$glc,
+                             i1imm:$slc, i1imm:$tfe),
+                             asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+      }
+    }
+
+    let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+      def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
+                           (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+                           asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+    }
  }
 }

--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> {
  let PrintMethod = "printInterpSlot";
 }

+def SendMsgImm : Operand<i32>;
+
 def isSI : Predicate<"Subtarget.getGeneration() "
                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;

@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
 def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
  []
 >;
-} // End hasSideEffects
 //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
 //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
 //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+
+let Uses = [EXEC] in {
+  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+      [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+  > {
+    let DisableEncoding = "$m0";
+  }
+} // End Uses = [EXEC]
+
 //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
 //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
 //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
 //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
 //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
 //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects

 def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
  (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
+                              "V_SUB_I32">;

 let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
+                               "V_SUBB_U32">;
 } // End Uses = [VCC]
 } // End isCommutable = 1, Defs = [VCC]

@ -1403,7 +1415,7 @@ def : Pat <
 /* int_SI_vs_load_input */
 def : Pat<
  (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
-  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
+  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
 >;

 /* int_SI_export */
@ -1658,16 +1670,30 @@ def : Pat <
   0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;

+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Manipulate the sign bit directly, as e.g. using the source negation modifier
+// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
+// breaking the piglit *s-floatBitsToInt-neg* tests
+
+// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
+// removing these patterns
+
+def : Pat <
+  (fneg (fabs f32:$src)),
+  (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
 def : Pat <
  (fabs f32:$src),
-  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
-   1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
 >;

 def : Pat <
  (fneg f32:$src),
-  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
-   0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
 >;

 /********** ================== **********/
@ -1794,6 +1820,11 @@ def : Pat <
  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 >;

+def : Pat <
+  (i32 (zext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
 // 1. Offset as 8bit DWORD immediate
 def : Pat <
  (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
@ -1809,7 +1840,7 @@ def : Pat <
 // 3. Offset in an 32Bit VGPR
 def : Pat <
  (SIload_constant i128:$sbase, i32:$voff),
-  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
+  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
 >;

 // The multiplication scales from [0,1] to the unsigned integer range
@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
 defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
 defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;

+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+                             MUBUF bothen> {
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+            (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm, 1, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+           (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+           (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+                                  imm, 1, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+            (as_i1imm $tfe))
+  >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
 //===----------------------------------------------------------------------===//
 // MTBUF Patterns
 //===----------------------------------------------------------------------===//
@ -2057,6 +2132,11 @@ def : Pat <
  (EXTRACT_SUBREG $a, sub0)
 >;

+def : Pat <
+  (i1 (trunc i32:$a)),
+  (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+>;
+
 // V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
 // case, the sgpr-copies pass will fix this to use the vector version.
 def : Pat <
--- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
     llvm_i32_ty],   // tfe(imm)
    []>;

+  // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
+  def int_SI_buffer_load_dword : Intrinsic <
+    [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+    [llvm_anyint_ty,  // rsrc(SGPR)
+     llvm_anyint_ty,  // vaddr(VGPR)
+     llvm_i32_ty,     // soffset(SGPR)
+     llvm_i32_ty,     // inst_offset(imm)
+     llvm_i32_ty,     // offen(imm)
+     llvm_i32_ty,     // idxen(imm)
+     llvm_i32_ty,     // glc(imm)
+     llvm_i32_ty,     // slc(imm)
+     llvm_i32_ty],    // tfe(imm)
+    [IntrReadArgMem]>;
+
+  def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;

  def int_SI_sample : Sample;
--- a/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/R600/SILowerControlFlow.cpp
@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
  return new SILowerControlFlowPass(tm);
 }

+static bool isDS(unsigned Opcode) {
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::DS_ADD_U32_RTN:
+  case AMDGPU::DS_SUB_U32_RTN:
+  case AMDGPU::DS_WRITE_B32:
+  case AMDGPU::DS_WRITE_B8:
+  case AMDGPU::DS_WRITE_B16:
+  case AMDGPU::DS_READ_B32:
+  case AMDGPU::DS_READ_I8:
+  case AMDGPU::DS_READ_U8:
+  case AMDGPU::DS_READ_I16:
+  case AMDGPU::DS_READ_U16:
+    return true;
+  }
+}
+
 bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
                                        MachineBasicBlock *To) {

@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();

-  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+  if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+      ShaderType::PIXEL ||
+      !shouldSkip(&MBB, &MBB.getParent()->back()))
    return;

  MachineBasicBlock::iterator Insert = &MI;
@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();

-  // Kill is only allowed in pixel shaders
+  // Kill is only allowed in pixel / geometry shaders
  assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
-         ShaderType::PIXEL);
+         ShaderType::PIXEL ||
+         MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+         ShaderType::GEOMETRY);

  // Clear this pixel from the exec mask if the operand is negative
  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {

      Next = llvm::next(I);
      MachineInstr &MI = *I;
+      if (isDS(MI.getOpcode())) {
+        NeedM0 = true;
+        NeedWQM = true;
+      }
+
      switch (MI.getOpcode()) {
        default: break;
        case AMDGPU::SI_IF:
@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
          IndirectDst(MI);
          break;

-        case AMDGPU::DS_READ_B32:
-          NeedWQM = true;
-          // Fall through
-        case AMDGPU::DS_WRITE_B32:
-        case AMDGPU::DS_ADD_U32_RTN:
-          NeedM0 = true;
-          break;
-
        case AMDGPU::V_INTERP_P1_F32:
        case AMDGPU::V_INTERP_P2_F32:
        case AMDGPU::V_INTERP_MOV_F32:
@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
            AMDGPU::M0).addImm(0xffffffff);
  }

-  if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
+  if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
    MachineBasicBlock &MBB = MF.front();
    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@ -1181,16 +1181,23 @@ X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
                                    unsigned Scale, SMLoc Start, SMLoc End,
                                    unsigned Size, StringRef Identifier,
                                    InlineAsmIdentifierInfo &Info){
-  if (isa<MCSymbolRefExpr>(Disp)) {
-    // If this is not a VarDecl then assume it is a FuncDecl or some other label
-    // reference.  We need an 'r' constraint here, so we need to create register
-    // operand to ensure proper matching.  Just pick a GPR based on the size of
-    // a pointer.
-    if (!Info.IsVarDecl) {
-      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
-                                   SMLoc(), Identifier, Info.OpDecl);
-    }
+  // If this is not a VarDecl then assume it is a FuncDecl or some other label
+  // reference.  We need an 'r' constraint here, so we need to create register
+  // operand to ensure proper matching.  Just pick a GPR based on the size of
+  // a pointer.
+  if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
+    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+                                 SMLoc(), Identifier, Info.OpDecl);
+  }
+
+  // We either have a direct symbol reference, or an offset from a symbol.  The
+  // parser always puts the symbol on the LHS, so look there for size
+  // calculation purposes.
+  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
+  bool IsSymRef =
+      isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
+  if (IsSymRef) {
    if (!Size) {
      Size = Info.Type * 8; // Size is in terms of bits in this context.
      if (Size)
@ -1312,10 +1319,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
          if (getParser().parsePrimaryExpr(Val, End))
            return Error(Tok.getLoc(), "Unexpected identifier!");
        } else {
-          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
-          if (ParseIntelIdentifier(Val, Identifier, Info,
-                                   /*Unevaluated=*/false, End))
-            return true;
+          // This is a dot operator, not an adjacent identifier.
+          if (Identifier.find('.') != StringRef::npos) {
+            return false;
+          } else {
+            InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+            if (ParseIntelIdentifier(Val, Identifier, Info,
+                                     /*Unevaluated=*/false, End))
+              return true;
+          }
        }
        SM.onIdentifierExpr(Val, Identifier);
        UpdateLocLex = false;
@ -1366,7 +1378,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
  if (ParseIntelExpression(SM, End))
    return 0;

-  const MCExpr *Disp;
+  const MCExpr *Disp = 0;
  if (const MCExpr *Sym = SM.getSym()) {
    // A symbolic displacement.
    Disp = Sym;
@ -1374,13 +1386,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
                                 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
                                 End);
-  } else {
-    // An immediate displacement only.   
-    Disp = MCConstantExpr::Create(SM.getImm(), getContext());
  }

-  // Parse the dot operator (e.g., [ebx].foo.bar).
-  if (Tok.getString().startswith(".")) {
+  if (SM.getImm() || !Disp) {
+    const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
+    if (Disp)
+      Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
+    else
+      Disp = Imm;  // An immediate displacement only.
+  }
+
+  // Parse struct field access.  Intel requires a dot, but MSVC doesn't.  MSVC
+  // will in fact do global lookup the field name inside all global typedefs,
+  // but we don't emulate that.
+  if (Tok.getString().find('.') != StringRef::npos) {
    const MCExpr *NewDisp;
    if (ParseIntelDotOperator(Disp, NewDisp))
      return 0;
@ -1532,8 +1551,10 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
  else
    return Error(Tok.getLoc(), "Non-constant offsets are not supported!");

-  // Drop the '.'.
-  StringRef DotDispStr = Tok.getString().drop_front(1);
+  // Drop the optional '.'.
+  StringRef DotDispStr = Tok.getString();
+  if (DotDispStr.startswith("."))
+    DotDispStr = DotDispStr.drop_front(1);

  // .Imm gets lexed as a real.
  if (Tok.is(AsmToken::Real)) {
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@ -1065,6 +1065,7 @@ static int readSIB(struct InternalInstruction* insn) {

  switch (base) {
  case 0x5:
+  case 0xd:
    switch (modFromModRM(insn->modRM)) {
    case 0x0:
      insn->eaDisplacement = EA_DISP_32;
@ -1072,13 +1073,11 @@ static int readSIB(struct InternalInstruction* insn) {
      break;
    case 0x1:
      insn->eaDisplacement = EA_DISP_8;
-      insn->sibBase = (insn->addressSize == 4 ?
-                       SIB_BASE_EBP : SIB_BASE_RBP);
+      insn->sibBase = (SIBBase)(sibBaseBase + base);
      break;
    case 0x2:
      insn->eaDisplacement = EA_DISP_32;
-      insn->sibBase = (insn->addressSize == 4 ?
-                       SIB_BASE_EBP : SIB_BASE_RBP);
+      insn->sibBase = (SIBBase)(sibBaseBase + base);
      break;
    case 0x3:
      debug("Cannot have Mod = 0b11 and a SIB byte");
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@ -65,6 +65,17 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {

  // Exceptions handling
  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  // FIXME: this should not depend on the target OS version, but on the ld64
+  // version in use.  From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
+  // FDE relocs may be used.
+  DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
+
+  // old assembler lacks some directives
+  // FIXME: this should really be a check on the assembler characteristics
+  // rather than OS version
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+    HasWeakDefCanBeHiddenDirective = false;
 }

 X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@ -393,9 +393,11 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
  case 'k': // Print SImode register
    Reg = getX86SubSuperRegister(Reg, MVT::i32);
    break;
-  case 'q': // Print DImode register
-    // FIXME: gcc will actually print e instead of r for 32-bit.
-    Reg = getX86SubSuperRegister(Reg, MVT::i64);
+  case 'q':
+    // Print 64-bit register names if 64-bit integer registers are available.
+    // Otherwise, print 32-bit register names.
+    MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+    Reg = getX86SubSuperRegister(Reg, Ty);
    break;
  }

--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -15226,9 +15226,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
    MBB->addSuccessor(EndMBB);
  }

+  // Make sure the last operand is EFLAGS, which gets clobbered by the branch
+  // that was just emitted, but clearly shouldn't be "saved".
+  assert((MI->getNumOperands() <= 3 ||
+          !MI->getOperand(MI->getNumOperands() - 1).isReg() ||
+          MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
+         && "Expected last argument to be EFLAGS");
  unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
  // In the XMM save block, save all the XMM argument registers.
-  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+  for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
    MachineMemOperand *MMO =
      F->getMachineMemOperand(
@ -17577,12 +17583,30 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
          // FIXME: need symbolic constants for these magic numbers.
          // See X86ATTInstPrinter.cpp:printSSECC().
          unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
-          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+          SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(),
+                                              CMP00, CMP01,
                                              DAG.getConstant(x86cc, MVT::i8));
-          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
-                                              OnesOrZeroesF);
-          SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
-                                      DAG.getConstant(1, MVT::i32));
+
+          MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
+
+          if (is64BitFP && !Subtarget->is64Bit()) {
+            // On a 32-bit target, we cannot bitcast the 64-bit float to a
+            // 64-bit integer, since that's not a legal type. Since
+            // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+            // bits, but can do this little dance to extract the lowest 32 bits
+            // and work with those going forward.
+            SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
+                                           OnesOrZeroesF);
+            SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
+                                           Vector64);
+            OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
+                                        Vector32, DAG.getIntPtrConstant(0));
+            IntVT = MVT::i32;
+          }
+
+          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
+          SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
+                                      DAG.getConstant(1, IntVT));
          SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
          return OneBitOfTruth;
        }
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@ -72,7 +72,7 @@ def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),


 // x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [EFLAGS] in {
 def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                              (outs),
                              (ins GR8:$al,
@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
                              [(X86vastart_save_xmm_regs GR8:$al,
                                                         imm:$regsavefi,
-                                                         imm:$offset)]>;
+                                                         imm:$offset),
+                               (implicit EFLAGS)]>;

 // The VAARG_64 pseudo-instruction takes the address of the va_list,
 // and places the address of the next argument into a register.
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@ -25,11 +25,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
    if (isConstant) return true;

    // If all elts are the same, we can extract it and use any of the values.
-    Constant *Op0 = C->getAggregateElement(0U);
-    for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
-      if (C->getAggregateElement(i) != Op0)
-        return false;
-    return true;
+    if (Constant *Op0 = C->getAggregateElement(0U)) {
+      for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
+           ++i)
+        if (C->getAggregateElement(i) != Op0)
+          return false;
+      return true;
+    }
  }
  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@ -1088,9 +1088,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                           L, SCEV::FlagAnyWrap));
  { // Limit the lifetime of SCEVExpander.
    SCEVExpander Expander(*SE, "reroll");
-    PHINode *NewIV =
-      cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
-                                           Header->begin()));
+    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
         JE = BaseUseSet.end(); J != JE; ++J)
      (*J)->replaceUsesOfWith(IV, NewIV);
@ -1101,20 +1100,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
        if (Inc == 1)
          ICSCEV =
            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
-        Value *IC;
-        if (isa<SCEVConstant>(ICSCEV)) {
-          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+        // Iteration count SCEV minus 1
+        const SCEV *ICMinus1SCEV =
+          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+        Value *ICMinus1; // Iteration count minus 1
+        if (isa<SCEVConstant>(ICMinus1SCEV)) {
+          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
        } else {
          BasicBlock *Preheader = L->getLoopPreheader();
          if (!Preheader)
            Preheader = InsertPreheaderForLoop(L, this);

-          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
-                                      Preheader->getTerminator());
+          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+                                            Preheader->getTerminator());
        }
 
-        Value *NewIVNext = NewIV->getIncomingValueForBlock(Header); 
-        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
                                   "exitcond");
        BI->setCondition(Cond);

--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@ -3390,6 +3390,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
    int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
    if (NewBaseOffset / Factor != Base.BaseOffset)
      continue;
+    // If the offset will be truncated at this use, check that it is in bounds.
+    if (!IntTy->isPointerTy() &&
+        !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
+      continue;

    // Check that multiplying with the use offset doesn't overflow.
    int64_t Offset = LU.MinOffset;
@ -3398,6 +3402,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
    Offset = (uint64_t)Offset * Factor;
    if (Offset / Factor != LU.MinOffset)
      continue;
+    // If the offset will be truncated at this use, check that it is in bounds.
+    if (!IntTy->isPointerTy() &&
+        !ConstantInt::isValueValidForType(IntTy, Offset))
+      continue;

    Formula F = Base;
    F.BaseOffset = NewBaseOffset;
@ -3432,6 +3440,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
      F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
      if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
        continue;
+      // If the offset will be truncated, check that it is in bounds.
+      if (!IntTy->isPointerTy() &&
+          !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
+        continue;
    }

    // If we make it here and it's legal, add it.
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@ -32,6 +32,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Constants.h"
@ -70,6 +71,7 @@ namespace {
      AU.addRequired<DominatorTree>();
      AU.addRequired<LoopInfo>();
      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreserved<AliasAnalysis>();
      AU.addPreserved<ScalarEvolution>();
    }
  private:
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -4191,13 +4191,22 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
        continue;
      }

-      // Process instructions only once (termination).
+      // Process instructions only once (termination). Each reduction cycle
+      // value must only be used once, except by phi nodes and min/max
+      // reductions which are represented as a cmp followed by a select.
+      ReductionInstDesc IgnoredVal(false, 0);
      if (VisitedInsts.insert(Usr)) {
        if (isa<PHINode>(Usr))
          PHIs.push_back(Usr);
        else
          NonPHIs.push_back(Usr);
-      }
+      } else if (!isa<PHINode>(Usr) &&
+                 ((!isa<FCmpInst>(Usr) &&
+                   !isa<ICmpInst>(Usr) &&
+                   !isa<SelectInst>(Usr)) ||
+                  !isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction))
+        return false;
+
      // Remember that we completed the cycle.
      if (Usr == Phi)
        FoundStartPHI = true;
--- a/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff
+++ b/contrib/llvm/patches/patch-r262809-clang-r203007-destructor-calling-conv.diff
@ -1,61 +0,0 @@
-Pull in r203007 from upstream clang trunk (by Rafael Espindola):
-
-  Don't produce an alias between destructors with different calling conventions.
-
-  Fixes pr19007.
-
-Introduced here: http://svn.freebsd.org/changeset/base/262809
-
-Index: tools/clang/lib/CodeGen/CGCXX.cpp
-===================================================================
--- tools/clang/lib/CodeGen/CGCXX.cpp
-+++ tools/clang/lib/CodeGen/CGCXX.cpp
-@@ -92,7 +92,13 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(c
-   if (!ClassLayout.getBaseClassOffset(UniqueBase).isZero())
-     return true;
- 
-+  // Give up if the calling conventions don't match. We could update the call,
-+  // but it is probably not worth it.
-   const CXXDestructorDecl *BaseD = UniqueBase->getDestructor();
-+  if (BaseD->getType()->getAs<FunctionType>()->getCallConv() !=
-+      D->getType()->getAs<FunctionType>()->getCallConv())
-+    return true;
-+
-   return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
-                                   GlobalDecl(BaseD, Dtor_Base),
-                                   false);
-Index: tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-===================================================================
--- tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-+++ tools/clang/test/CodeGenCXX/ctor-dtor-alias.cpp
-@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -triple x86_64-linux -emit-llvm -o - -mconstructor-aliases -O1 -disable-llvm-optzns | FileCheck %s
-// RUN: %clang_cc1 %s -triple x86_64-linux -emit-llvm -o - -mconstructor-aliases | FileCheck --check-prefix=NOOPT %s
-+// RUN: %clang_cc1 %s -triple i686-linux -emit-llvm -o - -mconstructor-aliases -O1 -disable-llvm-optzns | FileCheck %s
-+// RUN: %clang_cc1 %s -triple i686-linux -emit-llvm -o - -mconstructor-aliases | FileCheck --check-prefix=NOOPT %s
- 
- // RUN: %clang_cc1 -cc1 -triple x86_64--netbsd -emit-llvm \
- // RUN: -mconstructor-aliases -O2 %s -o - | FileCheck --check-prefix=CHECK-RAUW %s
-@@ -133,6 +133,22 @@ namespace test8 {
-   zed foo;
- }
- 
-+namespace test9 {
-+struct foo {
-+  __attribute__((stdcall)) ~foo() {
-+  }
-+};
-+
-+struct bar : public foo {};
-+
-+void zed() {
-+  // Test that we produce a call to bar's destructor. We used to call foo's, but
-+  // it has a different calling conversion.
-+  // CHECK-DAG: call void @_ZN5test93barD2Ev
-+  bar ptr;
-+}
-+}
-+
- // CHECK-RAUW: @_ZTV1C = linkonce_odr unnamed_addr constant [4 x i8*] [{{[^@]*}}@_ZTI1C {{[^@]*}}@_ZN1CD2Ev {{[^@]*}}@_ZN1CD0Ev {{[^@]*}}]
- // r194296 replaced C::~C with B::~B without emitting the later.
- 
--- a/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Driver.h
@ -415,6 +415,10 @@ public:
                                bool &HadExtra);
 };

+/// \return True if the last defined optimization level is -Ofast.
+/// And False otherwise.
+bool isOptimizationLevelFast(const llvm::opt::ArgList &Args);
+
 } // end namespace driver
 } // end namespace clang

--- a/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
+++ b/contrib/llvm/tools/clang/include/clang/Driver/ToolChain.h
@ -303,7 +303,7 @@ public:
  /// AddFastMathRuntimeIfAvailable - If a runtime library exists that sets
  /// global flags for unsafe floating point math, add it and return true.
  ///
-  /// This checks for presence of the -ffast-math or -funsafe-math flags.
+  /// This checks for presence of the -Ofast, -ffast-math or -funsafe-math flags.
  virtual bool
  AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args,
                                llvm::opt::ArgStringList &CmdArgs) const;
--- a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
@ -32,12 +32,23 @@ using namespace clang::comments;

 namespace  {
  // Colors used for various parts of the AST dump
+  // Do not use bold yellow for any text.  It is hard to read on white screens.

  struct TerminalColor {
    raw_ostream::Colors Color;
    bool Bold;
  };

+  // Red           - CastColor
+  // Green         - TypeColor
+  // Bold Green    - DeclKindNameColor, UndeserializedColor
+  // Yellow        - AddressColor, LocationColor
+  // Blue          - CommentColor, NullColor, IndentColor
+  // Bold Blue     - AttrColor
+  // Bold Magenta  - StmtColor
+  // Cyan          - ValueKindColor, ObjectKindColor
+  // Bold Cyan     - ValueColor, DeclNameColor
+
  // Decl kind names (VarDecl, FunctionDecl, etc)
  static const TerminalColor DeclKindNameColor = { raw_ostream::GREEN, true };
  // Attr names (CleanupAttr, GuardedByAttr, etc)
@ -45,7 +56,7 @@ namespace  {
  // Statement names (DeclStmt, ImplicitCastExpr, etc)
  static const TerminalColor StmtColor = { raw_ostream::MAGENTA, true };
  // Comment names (FullComment, ParagraphComment, TextComment, etc)
-  static const TerminalColor CommentColor = { raw_ostream::YELLOW, true };
+  static const TerminalColor CommentColor = { raw_ostream::BLUE, false };

  // Type names (int, float, etc, plus user defined types)
  static const TerminalColor TypeColor = { raw_ostream::GREEN, false };
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@ -5089,16 +5089,15 @@ bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
    if (!Result.isUninit())
      return true;

-    if (ZeroInit)
-      return ZeroInitialization(E);
-
-    const CXXRecordDecl *RD = FD->getParent();
-    if (RD->isUnion())
-      Result = APValue((FieldDecl*)0);
-    else
-      Result = APValue(APValue::UninitStruct(), RD->getNumBases(),
-                       std::distance(RD->field_begin(), RD->field_end()));
-    return true;
+    // We can get here in two different ways:
+    //  1) We're performing value-initialization, and should zero-initialize
+    //     the object, or
+    //  2) We're performing default-initialization of an object with a trivial
+    //     constexpr default constructor, in which case we should start the
+    //     lifetimes of all the base subobjects (there can be no data member
+    //     subobjects in this case) per [basic.life]p1.
+    // Either way, ZeroInitialization is appropriate.
+    return ZeroInitialization(E);
  }

  const FunctionDecl *Definition = 0;
@ -5578,19 +5577,9 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
    if (HadZeroInit)
      return true;

-    if (ZeroInit) {
-      ImplicitValueInitExpr VIE(Type);
-      return EvaluateInPlace(*Value, Info, Subobject, &VIE);
-    }
-
-    const CXXRecordDecl *RD = FD->getParent();
-    if (RD->isUnion())
-      *Value = APValue((FieldDecl*)0);
-    else
-      *Value =
-          APValue(APValue::UninitStruct(), RD->getNumBases(),
-                  std::distance(RD->field_begin(), RD->field_end()));
-    return true;
+    // See RecordExprEvaluator::VisitCXXConstructExpr for explanation.
+    ImplicitValueInitExpr VIE(Type);
+    return EvaluateInPlace(*Value, Info, Subobject, &VIE);
  }

  const FunctionDecl *Definition = 0;
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@ -709,9 +709,11 @@ void StmtPrinter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) {
 void StmtPrinter::VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *Node) {
  if (Node->isSuperReceiver())
    OS << "super.";
-  else if (Node->getBase()) {
+  else if (Node->isObjectReceiver() && Node->getBase()) {
    PrintExpr(Node->getBase());
    OS << ".";
+  } else if (Node->isClassReceiver() && Node->getClassReceiver()) {
+    OS << Node->getClassReceiver()->getName() << ".";
  }

  if (Node->isImplicitProperty())
--- a/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
@ -605,13 +605,25 @@ void ConsumedStmtVisitor::VisitBinaryOperator(const BinaryOperator *BinOp) {
  }
 }

+static bool isStdNamespace(const DeclContext *DC) {
+  if (!DC->isNamespace()) return false;
+  while (DC->getParent()->isNamespace())
+    DC = DC->getParent();
+  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
+
+  return ND && ND->getName() == "std" &&
+         ND->getDeclContext()->isTranslationUnit();
+}
+
 void ConsumedStmtVisitor::VisitCallExpr(const CallExpr *Call) {
  if (const FunctionDecl *FunDecl =
    dyn_cast_or_null<FunctionDecl>(Call->getDirectCallee())) {
    
    // Special case for the std::move function.
    // TODO: Make this more specific. (Deferred)
-    if (FunDecl->getNameAsString() == "move") {
+    if (Call->getNumArgs() == 1 &&
+        FunDecl->getNameAsString() == "move" &&
+        isStdNamespace(FunDecl->getDeclContext())) {
      forwardInfo(Call->getArg(0), Call);
      return;
    }
--- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
@ -358,6 +358,16 @@ public:
  LinuxTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
    this->UserLabelPrefix = "";
    this->WIntType = TargetInfo::UnsignedInt;
+
+    switch (Triple.getArch()) {
+    default:
+      break;
+    case llvm::Triple::ppc:
+    case llvm::Triple::ppc64:
+    case llvm::Triple::ppc64le:
+      this->MCountName = "_mcount";
+      break;
+    }
  }

  virtual const char *getStaticInitSectionSpecifier() const {
@ -1271,7 +1281,7 @@ public:
    LongLongAlign = 32;
    SuitableAlign = 128;
    DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
-                        "i64:32:64-f32:32:32-f64:64:64-v128:128:128-n32";
+                        "i64:32:64-f32:32:32-f64:32:64-v128:128:128-n32";
  }
  virtual BuiltinVaListKind getBuiltinVaListKind() const {
    return TargetInfo::CharPtrBuiltinVaList;
--- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {

  // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
  // pick up a tag in an SVN export, for example.
-  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_34/final/lib/Basic/Version.cpp $");
+  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_34/dot1-final/lib/Basic/Version.cpp $");
  if (URL.empty()) {
    URL = SVNRepository.slice(SVNRepository.find(':'),
                              SVNRepository.find("/lib/Basic"));
--- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
@ -2055,3 +2055,7 @@ std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {

  return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
 }
+
+bool clang::driver::isOptimizationLevelFast(const llvm::opt::ArgList &Args) {
+  return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
+}
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
@ -430,16 +430,19 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args,

 bool ToolChain::AddFastMathRuntimeIfAvailable(const ArgList &Args,
                                              ArgStringList &CmdArgs) const {
-  // Check if -ffast-math or -funsafe-math is enabled.
-  Arg *A = Args.getLastArg(options::OPT_ffast_math,
-                           options::OPT_fno_fast_math,
-                           options::OPT_funsafe_math_optimizations,
-                           options::OPT_fno_unsafe_math_optimizations);
-
-  if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
-      A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
-    return false;
+  // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed
+  // (to keep the linker options consistent with gcc and clang itself).
+  if (!isOptimizationLevelFast(Args)) {
+    // Check if -ffast-math or -funsafe-math.
+    Arg *A =
+        Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
+                        options::OPT_funsafe_math_optimizations,
+                        options::OPT_fno_unsafe_math_optimizations);

+    if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
+        A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
+      return false;
+  }
  // If crtfastmath.o exists add it to the arguments.
  std::string Path = GetFilePath("crtfastmath.o");
  if (Path == "crtfastmath.o") // Not found.
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
@ -1650,6 +1650,21 @@ bool Generic_GCC::isPICDefaultForced() const {
  return false;
 }

+void Generic_GCC::addClangTargetOptions(const ArgList &DriverArgs,
+                                        ArgStringList &CC1Args) const {
+  const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
+  bool UseInitArrayDefault = 
+      getTriple().getArch() == llvm::Triple::aarch64 ||
+      (getTriple().getOS() == llvm::Triple::Linux && (
+         !V.isOlderThan(4, 7, 0) ||
+         getTriple().getEnvironment() == llvm::Triple::Android));
+
+  if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
+                         options::OPT_fno_use_init_array,
+                         UseInitArrayDefault))
+    CC1Args.push_back("-fuse-init-array");
+}
+
 /// Hexagon Toolchain

 std::string Hexagon_TC::GetGnuDir(const std::string &InstalledDir) {
@ -2566,19 +2581,6 @@ Tool *Linux::buildAssembler() const {
  return new tools::gnutools::Assemble(*this);
 }

-void Linux::addClangTargetOptions(const ArgList &DriverArgs,
-                                  ArgStringList &CC1Args) const {
-  const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
-  bool UseInitArrayDefault =
-      !V.isOlderThan(4, 7, 0) ||
-      getTriple().getArch() == llvm::Triple::aarch64 ||
-      getTriple().getEnvironment() == llvm::Triple::Android;
-  if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
-                         options::OPT_fno_use_init_array,
-                         UseInitArrayDefault))
-    CC1Args.push_back("-fuse-init-array");
-}
-
 std::string Linux::computeSysRoot() const {
  if (!getDriver().SysRoot.empty())
    return getDriver().SysRoot;
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
@ -164,6 +164,8 @@ public:
  virtual bool isPICDefault() const;
  virtual bool isPIEDefault() const;
  virtual bool isPICDefaultForced() const;
+  virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                                     llvm::opt::ArgStringList &CC1Args) const;

 protected:
  virtual Tool *getTool(Action::ActionClass AC) const;
@ -584,8 +586,6 @@ public:
  virtual void
  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                            llvm::opt::ArgStringList &CC1Args) const;
-  virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args) const;
  virtual void
  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                               llvm::opt::ArgStringList &CC1Args) const;
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@ -2003,13 +2003,6 @@ static void SplitDebugInfo(const ToolChain &TC, Compilation &C,
  C.addCommand(new Command(JA, T, Exec, StripArgs));
 }

-static bool isOptimizationLevelFast(const ArgList &Args) {
-  if (Arg *A = Args.getLastArg(options::OPT_O_Group))
-    if (A->getOption().matches(options::OPT_Ofast))
-      return true;
-  return false;
-}
-
 /// \brief Vectorize at all optimization levels greater than 1 except for -Oz.
 static bool shouldEnableVectorizerAtOLevel(const ArgList &Args) {
  if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
@ -4621,8 +4614,14 @@ void darwin::Assemble::ConstructJob(Compilation &C, const JobAction &JA,

  // If -no_integrated_as is used add -Q to the darwin assember driver to make
  // sure it runs its system assembler not clang's integrated assembler.
-  if (Args.hasArg(options::OPT_no_integrated_as))
-    CmdArgs.push_back("-Q");
+  // Applicable to darwin11+ and Xcode 4+.  darwin<10 lacked integrated-as.
+  // FIXME: at run-time detect assembler capabilities or rely on version
+  // information forwarded by -target-assembler-version (future)
+  if (Args.hasArg(options::OPT_no_integrated_as)) {
+    const llvm::Triple& t(getToolChain().getTriple());
+    if (!(t.isMacOSX() && t.isMacOSXVersionLT(10, 7)))
+      CmdArgs.push_back("-Q");
+  }

  // Forward -g, assuming we are dealing with an actual assembly file.
  if (SourceAction->getType() == types::TY_Asm ||
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@ -5836,17 +5836,16 @@ static inline bool VariableCanNeverBeAConstantExpression(VarDecl *Var,
  assert(DefVD);
  if (DefVD->isWeak()) return false;
  EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt();
-  
+
  Expr *Init = cast<Expr>(Eval->Value);

  if (Var->getType()->isDependentType() || Init->isValueDependent()) {
-    if (!Init->isValueDependent())
-      return !DefVD->checkInitIsICE();
-    // FIXME: We might still be able to do some analysis of Init here
-    // to conclude that even in a dependent setting, Init can never
-    // be a constexpr - but for now admit agnosticity.
+    // FIXME: Teach the constant evaluator to deal with the non-dependent parts
+    // of value-dependent expressions, and use it here to determine whether the
+    // initializer is a potential constant expression.
    return false;
-  } 
+  }
+
  return !IsVariableAConstantExpression(Var, Context); 
 }

--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@ -92,7 +92,7 @@
        ..
    ..
    clang
-        3.4
+        3.4.1
        ..
    ..
    crypto
--- a/lib/clang/include/Makefile
+++ b/lib/clang/include/Makefile
@ -8,7 +8,7 @@ LLVM_SRCS= ${.CURDIR}/../../../contrib/llvm

 .PATH: ${LLVM_SRCS}/tools/clang/lib/Headers

-INCSDIR=${INCLUDEDIR}/clang/3.4
+INCSDIR=${INCLUDEDIR}/clang/3.4.1

 INCS=	__wmmintrin_aes.h \
 	__wmmintrin_pclmul.h \
--- a/lib/clang/include/clang/Basic/Version.inc
+++ b/lib/clang/include/clang/Basic/Version.inc
@ -1,10 +1,11 @@
 /* $FreeBSD$ */

-#define	CLANG_VERSION		3.4
-#define	CLANG_VERSION_MAJOR	3
-#define	CLANG_VERSION_MINOR	4
+#define	CLANG_VERSION			3.4.1
+#define	CLANG_VERSION_MAJOR		3
+#define	CLANG_VERSION_MINOR		4
+#define	CLANG_VERSION_PATCHLEVEL	1

-#define	CLANG_VENDOR		"FreeBSD "
-#define	CLANG_VENDOR_SUFFIX	" 20140216"
+#define	CLANG_VENDOR			"FreeBSD "
+#define	CLANG_VENDOR_SUFFIX		" 20140512"

-#define	SVN_REVISION		"197956"
+#define	SVN_REVISION			"208032"
--- a/lib/clang/include/llvm/Config/config.h
+++ b/lib/clang/include/llvm/Config/config.h
@ -8,9 +8,6 @@
 /* Get __FreeBSD_version. */
 #include <osreldate.h>

-/* Define if building universal (internal helper macro) */
-/* #undef AC_APPLE_UNIVERSAL_BUILD */
-
 /* Bug report URL. */
 #define BUG_REPORT_URL "http://llvm.org/bugs/"

@ -634,7 +631,7 @@
 /* #undef LLVM_PATH_XDOT */

 /* Installation prefix directory */
-#define LLVM_PREFIX ""
+#define LLVM_PREFIX "/usr"

 /* Define if we have the Intel JIT API runtime support library */
 #define LLVM_USE_INTEL_JITEVENTS 0
@ -648,6 +645,9 @@
 /* Minor version of the LLVM API */
 #define LLVM_VERSION_MINOR 4

+/* Patch version of the LLVM API */
+#define LLVM_VERSION_PATCH 1
+
 /* Define if the OS needs help to load dependent libraries for dlopen(). */
 #define LTDL_DLOPEN_DEPLIBS 1

@ -675,16 +675,13 @@
 #define PACKAGE_NAME "LLVM"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "LLVM 3.4"
+#define PACKAGE_STRING "LLVM 3.4.1"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "llvm"

-/* Define to the home page for this package. */
-/* #undef PACKAGE_URL */
-
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "3.4"
+#define PACKAGE_VERSION "3.4.1"

 /* Define as the return type of signal handlers (`int' or `void'). */
 #define RETSIGTYPE void
@ -707,18 +704,6 @@
 /* Type of 1st arg on ELM Callback */
 /* #undef WIN32_ELMCB_PCSTR */

-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
-   significant byte first (like Motorola and SPARC, unlike Intel). */
-#if defined AC_APPLE_UNIVERSAL_BUILD
-# if defined __BIG_ENDIAN__
-#  define WORDS_BIGENDIAN 1
-# endif
-#else
-# ifndef WORDS_BIGENDIAN
-/* #  undef WORDS_BIGENDIAN */
-# endif
-#endif
-
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */

--- a/lib/clang/include/llvm/Config/llvm-config.h
+++ b/lib/clang/include/llvm/Config/llvm-config.h
@ -109,7 +109,7 @@
 /* #undef LLVM_PATH_XDOT_PY */

 /* Installation prefix directory */
-#define LLVM_PREFIX ""
+#define LLVM_PREFIX "/usr"

 /* Define if we have the Intel JIT API runtime support library */
 #define LLVM_USE_INTEL_JITEVENTS 0
--- a/tools/build/mk/OptionalObsoleteFiles.inc
+++ b/tools/build/mk/OptionalObsoleteFiles.inc
@ -488,41 +488,41 @@ OLD_FILES+=usr/bin/clang++
 OLD_FILES+=usr/bin/clang-cpp
 OLD_FILES+=usr/bin/clang-tblgen
 OLD_FILES+=usr/bin/tblgen
-OLD_FILES+=usr/include/clang/3.4/__wmmintrin_aes.h
-OLD_FILES+=usr/include/clang/3.4/__wmmintrin_pclmul.h
-OLD_FILES+=usr/include/clang/3.4/altivec.h
-OLD_FILES+=usr/include/clang/3.4/ammintrin.h
-OLD_FILES+=usr/include/clang/3.4/arm_neon.h
-OLD_FILES+=usr/include/clang/3.4/avx2intrin.h
-OLD_FILES+=usr/include/clang/3.4/avxintrin.h
-OLD_FILES+=usr/include/clang/3.4/bmi2intrin.h
-OLD_FILES+=usr/include/clang/3.4/bmiintrin.h
-OLD_FILES+=usr/include/clang/3.4/cpuid.h
-OLD_FILES+=usr/include/clang/3.4/emmintrin.h
-OLD_FILES+=usr/include/clang/3.4/f16cintrin.h
-OLD_FILES+=usr/include/clang/3.4/fma4intrin.h
-OLD_FILES+=usr/include/clang/3.4/fmaintrin.h
-OLD_FILES+=usr/include/clang/3.4/immintrin.h
-OLD_FILES+=usr/include/clang/3.4/lzcntintrin.h
-OLD_FILES+=usr/include/clang/3.4/mm3dnow.h
-OLD_FILES+=usr/include/clang/3.4/mm_malloc.h
-OLD_FILES+=usr/include/clang/3.4/mmintrin.h
-OLD_FILES+=usr/include/clang/3.4/module.map
-OLD_FILES+=usr/include/clang/3.4/nmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/pmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/popcntintrin.h
-OLD_FILES+=usr/include/clang/3.4/prfchwintrin.h
-OLD_FILES+=usr/include/clang/3.4/rdseedintrin.h
-OLD_FILES+=usr/include/clang/3.4/rtmintrin.h
-OLD_FILES+=usr/include/clang/3.4/shaintrin.h
-OLD_FILES+=usr/include/clang/3.4/smmintrin.h
-OLD_FILES+=usr/include/clang/3.4/tbmintrin.h
-OLD_FILES+=usr/include/clang/3.4/tmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/wmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/x86intrin.h
-OLD_FILES+=usr/include/clang/3.4/xmmintrin.h
-OLD_FILES+=usr/include/clang/3.4/xopintrin.h
-OLD_DIRS+=usr/include/clang/3.4
+OLD_FILES+=usr/include/clang/3.4.1/__wmmintrin_aes.h
+OLD_FILES+=usr/include/clang/3.4.1/__wmmintrin_pclmul.h
+OLD_FILES+=usr/include/clang/3.4.1/altivec.h
+OLD_FILES+=usr/include/clang/3.4.1/ammintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/arm_neon.h
+OLD_FILES+=usr/include/clang/3.4.1/avx2intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/avxintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/bmi2intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/bmiintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/cpuid.h
+OLD_FILES+=usr/include/clang/3.4.1/emmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/f16cintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/fma4intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/fmaintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/immintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/lzcntintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/mm3dnow.h
+OLD_FILES+=usr/include/clang/3.4.1/mm_malloc.h
+OLD_FILES+=usr/include/clang/3.4.1/mmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/module.map
+OLD_FILES+=usr/include/clang/3.4.1/nmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/pmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/popcntintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/prfchwintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/rdseedintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/rtmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/shaintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/smmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/tbmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/tmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/wmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/x86intrin.h
+OLD_FILES+=usr/include/clang/3.4.1/xmmintrin.h
+OLD_FILES+=usr/include/clang/3.4.1/xopintrin.h
+OLD_DIRS+=usr/include/clang/3.4.1
 OLD_DIRS+=usr/include/clang
 OLD_FILES+=usr/share/doc/llvm/clang/LICENSE.TXT
 OLD_DIRS+=usr/share/doc/llvm/clang