Merge llvm release_70 branch r338892, and resolve conflicts.

2018-08-04 13:25:25 +00:00 · 2018-08-04 13:25:25 +00:00 · 40ab48c224
commit 40ab48c224
parent bbd7a9298f 5c03f3e190
7 changed files with 163 additions and 37 deletions
--- a/contrib/llvm/include/llvm/Support/DebugCounter.h
+++ b/contrib/llvm/include/llvm/Support/DebugCounter.h
@ -70,10 +70,9 @@ public:
    return instance().addCounter(Name, Desc);
  }
  inline static bool shouldExecute(unsigned CounterName) {
-// Compile to nothing when debugging is off
-#ifdef NDEBUG
-    return true;
-#else
+    if (!isCountingEnabled())
+      return true;
+
    auto &Us = instance();
    auto Result = Us.Counters.find(CounterName);
    if (Result != Us.Counters.end()) {
@ -93,7 +92,6 @@ public:
    }
    // Didn't find the counter, should we warn?
    return true;
-#endif // NDEBUG
  }

  // Return true if a given counter had values set (either programatically or on
@ -142,7 +140,23 @@ public:
  }
  CounterVector::const_iterator end() const { return RegisteredCounters.end(); }

+  // Force-enables counting all DebugCounters.
+  //
+  // Since DebugCounters are incompatible with threading (not only do they not
+  // make sense, but we'll also see data races), this should only be used in
+  // contexts where we're certain we won't spawn threads.
+  static void enableAllCounters() { instance().Enabled = true; }
+
 private:
+  static bool isCountingEnabled() {
+// Compile to nothing when debugging is off
+#ifdef NDEBUG
+    return false;
+#else
+    return instance().Enabled;
+#endif
+  }
+
  unsigned addCounter(const std::string &Name, const std::string &Desc) {
    unsigned Result = RegisteredCounters.insert(Name);
    Counters[Result] = {};
@ -159,6 +173,10 @@ private:
  };
  DenseMap<unsigned, CounterInfo> Counters;
  CounterVector RegisteredCounters;
+
+  // Whether we should do DebugCounting at all. DebugCounters aren't
+  // thread-safe, so this should always be false in multithreaded scenarios.
+  bool Enabled = false;
 };

 #define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)                              \
--- a/contrib/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm/lib/Support/DebugCounter.cpp
@ -82,6 +82,7 @@ void DebugCounter::push_back(const std::string &Val) {
             << " is not a registered counter\n";
      return;
    }
+    enableAllCounters();
    Counters[CounterID].Skip = CounterVal;
    Counters[CounterID].IsSet = true;
  } else if (CounterPair.first.endswith("-count")) {
@ -92,6 +93,7 @@ void DebugCounter::push_back(const std::string &Val) {
             << " is not a registered counter\n";
      return;
    }
+    enableAllCounters();
    Counters[CounterID].StopAfter = CounterVal;
    Counters[CounterID].IsSet = true;
  } else {
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@ -4639,7 +4639,9 @@ class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,

 multiclass FPCondComparison<bit signalAllNans, string mnemonic,
                            SDPatternOperator OpNode = null_frag> {
-  def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic, []> {
+  def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic,
+      [(set NZCV, (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm), (i32 imm:$nzcv),
+                          (i32 imm:$cond), NZCV))]> {
    let Inst{23-22} = 0b11;
    let Predicates = [HasFullFP16];
  }
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -11761,6 +11761,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
      ShiftCst);
 }

+// Is this an extending load from an f32 to an f64?
+static bool isFPExtLoad(SDValue Op) {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
+    return LD->getExtensionType() == ISD::EXTLOAD &&
+      Op.getValueType() == MVT::f64;
+  return false;
+}
+
 /// Reduces the number of fp-to-int conversion when building a vector.
 ///
 /// If this vector is built out of floating to integer conversions,
@ -11795,11 +11803,18 @@ combineElementTruncationToVectorTruncation(SDNode *N,
    SmallVector<SDValue, 4> Ops;
    EVT TargetVT = N->getValueType(0);
    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
-      if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
+      SDValue NextOp = N->getOperand(i);
+      if (NextOp.getOpcode() != PPCISD::MFVSR)
        return SDValue();
-      unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
+      unsigned NextConversion = NextOp.getOperand(0).getOpcode();
      if (NextConversion != FirstConversion)
        return SDValue();
+      // If we are converting to 32-bit integers, we need to add an FP_ROUND.
+      // This is not valid if the input was originally double precision. It is
+      // also not profitable to do unless this is an extending load in which
+      // case doing this combine will allow us to combine consecutive loads.
+      if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
+        return SDValue();
      if (N->getOperand(i) != FirstInput)
        IsSplat = false;
    }
@ -11813,8 +11828,9 @@ combineElementTruncationToVectorTruncation(SDNode *N,
    // Now that we know we have the right type of node, get its operands
    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
      SDValue In = N->getOperand(i).getOperand(0);
-      // For 32-bit values, we need to add an FP_ROUND node.
      if (Is32Bit) {
+        // For 32-bit values, we need to add an FP_ROUND node (if we made it
+        // here, we know that all inputs are extending loads so this is safe).
        if (In.isUndef())
          Ops.push_back(DAG.getUNDEF(SrcVT));
        else {
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@ -3494,6 +3494,17 @@ def DblToFlt {
  dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
 }

+def ExtDbl {
+  dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0))))));
+  dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1))))));
+  dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0))))));
+  dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1))))));
+  dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0))))));
+  dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1))))));
+  dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0))))));
+  dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1))))));
+}
+
 def ByteToWord {
  dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
  dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
@ -3571,9 +3582,15 @@ def FltToULong {
 }
 def DblToInt {
  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
+  dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B))));
+  dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C))));
+  dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D))));
 }
 def DblToUInt {
  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
+  dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B))));
+  dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C))));
+  dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D))));
 }
 def DblToLong {
  dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
@ -3612,6 +3629,47 @@ def MrgFP {
  dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
 }

+// Word-element merge dags - conversions from f64 to i32 merged into vectors.
+def MrgWords {
+  // For big endian, we merge low and hi doublewords (A, B).
+  dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
+  dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
+  dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
+  dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
+  dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
+  dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
+
+  // For little endian, we merge low and hi doublewords (B, A).
+  dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
+  dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
+  dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
+  dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
+  dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
+  dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
+
+  // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
+  // then merge.
+  dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
+                            (COPY_TO_REGCLASS f64:$C, VSRC), 0));
+  dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
+                            (COPY_TO_REGCLASS f64:$D, VSRC), 0));
+  dag CVACS = (v4i32 (XVCVDPSXWS AC));
+  dag CVBDS = (v4i32 (XVCVDPSXWS BD));
+  dag CVACU = (v4i32 (XVCVDPUXWS AC));
+  dag CVBDU = (v4i32 (XVCVDPUXWS BD));
+
+  // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
+  // then merge.
+  dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
+                            (COPY_TO_REGCLASS f64:$B, VSRC), 0));
+  dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
+                            (COPY_TO_REGCLASS f64:$A, VSRC), 0));
+  dag CVDBS = (v4i32 (XVCVDPSXWS DB));
+  dag CVCAS = (v4i32 (XVCVDPSXWS CA));
+  dag CVDBU = (v4i32 (XVCVDPUXWS DB));
+  dag CVCAU = (v4i32 (XVCVDPUXWS CA));
+}
+
 // Patterns for BUILD_VECTOR nodes.
 let AddedComplexity = 400 in {

@ -3679,6 +3737,20 @@ let AddedComplexity = 400 in {
    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
                                   DblToFlt.B0, DblToFlt.B1)),
              (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+
+    // Convert 4 doubles to a vector of ints.
+    def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+                                   DblToInt.C, DblToInt.D)),
+              (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
+    def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+                                   DblToUInt.C, DblToUInt.D)),
+              (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
+    def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+                                   ExtDbl.B0S, ExtDbl.B1S)),
+              (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
+    def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+                                   ExtDbl.B0U, ExtDbl.B1U)),
+              (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
  }

  let Predicates = [IsLittleEndian, HasVSX] in {
@ -3693,6 +3765,20 @@ let AddedComplexity = 400 in {
    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
                                   DblToFlt.B0, DblToFlt.B1)),
              (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+
+    // Convert 4 doubles to a vector of ints.
+    def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+                                   DblToInt.C, DblToInt.D)),
+              (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
+    def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+                                   DblToUInt.C, DblToUInt.D)),
+              (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
+    def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+                                   ExtDbl.B0S, ExtDbl.B1S)),
+              (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
+    def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+                                   ExtDbl.B0U, ExtDbl.B1U)),
+              (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
  }

  let Predicates = [HasDirectMove] in {
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@ -738,6 +738,10 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
    if (GV->isThreadLocal())
      return false;

+    // Can't handle !absolute_symbol references yet.
+    if (GV->isAbsoluteSymbolRef())
+      return false;
+
    // RIP-relative addresses can't have additional register operands, so if
    // we've already folded stuff into the addressing mode, just force the
    // global value into its own register, which we can use as the basereg.
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@ -63,46 +63,44 @@ OPTIONS:
 )";

 const char ArHelp[] = R"(
-OVERVIEW: LLVM Archiver (llvm-ar)
+OVERVIEW: LLVM Archiver

-  This program archives bitcode files into single libraries
-
-USAGE: llvm-ar [options] [relpos] [count] <archive-file> [members]...
+USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] <archive> [files]
+       llvm-ar -M [<mri-script]

 OPTIONS:
-  -M                                -
-  -format                           - Archive format to create
-    =default                        -   default
-    =gnu                            -   gnu
-    =darwin                         -   darwin
-    =bsd                            -   bsd
-  -plugin=<string>                  - plugin (ignored for compatibility
-  -help                             - Display available options
-  -version                          - Display the version of this program
+  --format              - Archive format to create
+    =default            -   default
+    =gnu                -   gnu
+    =darwin             -   darwin
+    =bsd                -   bsd
+  --plugin=<string>     - Ignored for compatibility
+  --help                - Display available options
+  --version             - Display the version of this program

 OPERATIONS:
-  d[NsS]       - delete file(s) from the archive
-  m[abiSs]     - move file(s) in the archive
-  p[kN]        - print file(s) found in the archive
-  q[ufsS]      - quick append file(s) to the archive
-  r[abfiuRsS]  - replace or insert file(s) into the archive
-  t            - display contents of archive
-  x[No]        - extract file(s) from the archive
+  d - delete [files] from the archive
+  m - move [files] in the archive
+  p - print [files] found in the archive
+  q - quick append [files] to the archive
+  r - replace or insert [files] into the archive
+  s - act as ranlib
+  t - display contents of archive
+  x - extract [files] from the archive

-MODIFIERS (operation specific):
-  [a] - put file(s) after [relpos]
-  [b] - put file(s) before [relpos] (same as [i])
+MODIFIERS:
+  [a] - put [files] after [relpos]
+  [b] - put [files] before [relpos] (same as [i])
+  [c] - do not warn if archive had to be created
  [D] - use zero for timestamps and uids/gids (default)
-  [i] - put file(s) before [relpos] (same as [b])
+  [i] - put [files] before [relpos] (same as [b])
+  [l] - ignored for compatibility
  [o] - preserve original dates
  [s] - create an archive index (cf. ranlib)
  [S] - do not build a symbol table
  [T] - create a thin archive
-  [u] - update only files newer than archive contents
+  [u] - update only [files] newer than archive contents
  [U] - use actual timestamps and uids/gids
-
-MODIFIERS (generic):
-  [c] - do not warn if the library had to be created
  [v] - be verbose about actions taken
 )";