mlx5 M_TSTMP accuracy looses quite a bit of precision so lets fix it.

The way that the clock is synchronized between the system and the current mlx5 for the purposes of the M_TSTMP being carried we loose a lot of precision. Instead lets change the math that calculates this to separate out the seconds/nanoseconds and operate on the two values so we don't get overflow instead of just shifting the value down and loosing precision. Reviewed by: kib, hselasky Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D36327
2022-09-20 13:12:16 -04:00 · 2022-09-20 13:12:16 -04:00 · 7cc3ea9c6f
commit 7cc3ea9c6f
parent 06a400d76b
3 changed files with 52 additions and 15 deletions
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@ -1144,6 +1144,7 @@ struct mlx5e_priv {
 	int	clbr_curr;
 	struct mlx5e_clbr_point clbr_points[2];
 	u_int	clbr_gen;
+	uint64_t cclk;

 	struct mlx5e_dcbx dcbx;
 	bool	sw_is_port_buf_owner;
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@ -4797,6 +4797,8 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 	    &priv->clbr_done, 0,
 	    "RX timestamps calibration state");
 	callout_init(&priv->tstmp_clbr, 1);
+	/* Pull out the frequency of the clock in hz */
+	priv->cclk = (uint64_t)MLX5_CAP_GEN(mdev, device_frequency_khz) * 1000ULL;
 	mlx5e_reset_calibration_callout(priv);

 	pa.pa_version = PFIL_VERSION;
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@ -214,7 +214,10 @@ static uint64_t
 mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp)
 {
 	struct mlx5e_clbr_point *cp, dcp;
-	uint64_t a1, a2, res;
+	uint64_t tstmp_sec, tstmp_nsec;
+	uint64_t hw_clocks;
+	uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res;
+	uint64_t hw_clk_div;
 	u_int gen;

 	do {
@ -224,19 +227,49 @@ mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp)
 			return (0);
 		dcp = *cp;
 		atomic_thread_fence_acq();
-	} while (gen != cp->clbr_gen);
-
-	a1 = (hw_tstmp - dcp.clbr_hw_prev) >> MLX5E_TSTMP_PREC;
-	a2 = (dcp.base_curr - dcp.base_prev) >> MLX5E_TSTMP_PREC;
-	res = (a1 * a2) << MLX5E_TSTMP_PREC;
-
+	} while (gen != dcp.clbr_gen);
 	/*
-	 * Divisor cannot be zero because calibration callback
-	 * checks for the condition and disables timestamping
-	 * if clock halted.
+	 * Our goal here is to have a result that is:
+	 *
+	 * (                             (cur_time - prev_time)   )
+	 * ((hw_tstmp - hw_prev) *  ----------------------------- ) + prev_time
+	 * (                             (hw_cur - hw_prev)       )
+	 *
+	 * With the constraints that we cannot use float and we
+	 * don't want to overflow the uint64_t numbers we are using.
+	 *
+	 * The plan is to take the clocking value of the hw timestamps
+	 * and split them into seconds and nanosecond equivalent portions.
+	 * Then we operate on the two portions seperately making sure to
+	 * bring back the carry over from the seconds when we divide.
+	 *
+	 * First up lets get the two divided into separate entities
+	 * i.e. the seconds. We use the clock frequency for this.
+	 * Note that priv->cclk was setup with the clock frequency
+	 * in hz so we are all set to go.
 	 */
-	res /= (dcp.clbr_hw_curr - dcp.clbr_hw_prev) >> MLX5E_TSTMP_PREC;
-
+	hw_clocks = hw_tstmp - dcp.clbr_hw_prev;
+	tstmp_sec = hw_clocks / priv->cclk;
+	tstmp_nsec = hw_clocks % priv->cclk;
+	/* Now work with them separately */
+	rt_cur_to_prev = (dcp.base_curr - dcp.base_prev);
+	res_s = tstmp_sec * rt_cur_to_prev;
+	res_n = tstmp_nsec * rt_cur_to_prev;
+	/* Now lets get our divider */
+	hw_clk_div = dcp.clbr_hw_curr - dcp.clbr_hw_prev;
+	/* Make sure to save the remainder from the seconds divide */
+	res_s_modulo = res_s % hw_clk_div;
+	res_s /= hw_clk_div;
+	/* scale the remainder to where it should be */
+	res_s_modulo *= priv->cclk;
+	/* Now add in the remainder */
+	res_n += res_s_modulo;
+	/* Now do the divide */
+	res_n /= hw_clk_div;
+	res_s *= priv->cclk;
+	/* Recombine the two */
+	res = res_s + res_n;
+	/* And now add in the base time to get to the real timestamp */
 	res += dcp.base_prev;
 	return (res);
 }
@ -370,10 +403,11 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
 			tstmp &= ~MLX5_CQE_TSTMP_PTP;
 			mb->m_flags |= M_TSTMP_HPREC;
 		}
-		mb->m_pkthdr.rcv_tstmp = tstmp;
-		mb->m_flags |= M_TSTMP;
+		if (tstmp != 0) {
+			mb->m_pkthdr.rcv_tstmp = tstmp;
+			mb->m_flags |= M_TSTMP;
+		}
 	}
-
 	switch (get_cqe_tls_offload(cqe)) {
 	case CQE_TLS_OFFLOAD_DECRYPTED:
 		/* set proper checksum flag for decrypted packets */