net/idpf: support timestamp offload

Add support for timestamp offload.

Signed-off-by: Wenjing Qiao <wenjing.qiao@intel.com>
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
This commit is contained in:
Junfeng Guo 2022-10-31 08:33:46 +00:00 committed by Thomas Monjalon
parent 5bf87b45b2
commit 268d82e5f3
5 changed files with 163 additions and 1 deletions

View File

@ -11,6 +11,7 @@ MTU update = Y
TSO = P
L3 checksum offload = P
L4 checksum offload = P
Timestamp offload = P
Linux = Y
x86-32 = Y
x86-64 = Y

View File

@ -22,6 +22,8 @@ rte_spinlock_t idpf_adapter_lock;
struct idpf_adapter_list idpf_adapter_list;
bool idpf_adapter_list_init;
uint64_t idpf_timestamp_dynflag;
static const char * const idpf_valid_args[] = {
IDPF_TX_SINGLE_Q,
IDPF_RX_SINGLE_Q,
@ -65,7 +67,8 @@ idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
RTE_ETH_RX_OFFLOAD_TIMESTAMP;
dev_info->tx_offload_capa =
RTE_ETH_TX_OFFLOAD_TCP_TSO |

View File

@ -167,6 +167,9 @@ struct idpf_adapter {
bool tx_vec_allowed;
bool rx_use_avx512;
bool tx_use_avx512;
/* For PTP */
uint64_t time_hw;
};
TAILQ_HEAD(idpf_adapter_list, idpf_adapter);

View File

@ -10,6 +10,8 @@
#include "idpf_rxtx.h"
#include "idpf_rxtx_vec_common.h"
static int idpf_timestamp_dynfield_offset = -1;
static int
check_rx_thresh(uint16_t nb_desc, uint16_t thresh)
{
@ -900,6 +902,24 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
return idpf_tx_split_queue_setup(dev, queue_idx, nb_desc,
socket_id, tx_conf);
}
static int
idpf_register_ts_mbuf(struct idpf_rx_queue *rxq)
{
int err;
if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) {
/* Register mbuf field and flag for Rx timestamp */
err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
&idpf_timestamp_dynflag);
if (err != 0) {
PMD_DRV_LOG(ERR,
"Cannot register mbuf field/flag for timestamp");
return -EINVAL;
}
}
return 0;
}
static int
idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq)
{
@ -993,6 +1013,13 @@ idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id)
return -EINVAL;
}
err = idpf_register_ts_mbuf(rxq);
if (err != 0) {
PMD_DRV_LOG(ERR, "fail to regidter timestamp mbuf %u",
rx_queue_id);
return -EIO;
}
if (rxq->bufq1 == NULL) {
/* Single queue */
err = idpf_alloc_single_rxq_mbufs(rxq);
@ -1354,6 +1381,7 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
struct idpf_rx_queue *rxq;
const uint32_t *ptype_tbl;
uint8_t status_err0_qw1;
struct idpf_adapter *ad;
struct rte_mbuf *rxm;
uint16_t rx_id_bufq1;
uint16_t rx_id_bufq2;
@ -1363,9 +1391,11 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t gen_id;
uint16_t rx_id;
uint16_t nb_rx;
uint64_t ts_ns;
nb_rx = 0;
rxq = rx_queue;
ad = rxq->adapter;
if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
return nb_rx;
@ -1376,6 +1406,9 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_desc_ring = rxq->rx_ring;
ptype_tbl = rxq->adapter->ptype_tbl;
if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0)
rxq->hw_register_set = 1;
while (nb_rx < nb_pkts) {
rx_desc = &rx_desc_ring[rx_id];
@ -1431,6 +1464,18 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
status_err0_qw1 = rx_desc->status_err0_qw1;
pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc);
if (idpf_timestamp_dynflag > 0 &&
(rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
/* timestamp */
ts_ns = idpf_tstamp_convert_32b_64b(ad,
rxq->hw_register_set,
rte_le_to_cpu_32(rx_desc->ts_high));
rxq->hw_register_set = 0;
*RTE_MBUF_DYNFIELD(rxm,
idpf_timestamp_dynfield_offset,
rte_mbuf_timestamp_t *) = ts_ns;
rxm->ol_flags |= idpf_timestamp_dynflag;
}
rxm->ol_flags |= pkt_flags;
@ -1732,18 +1777,22 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
const uint32_t *ptype_tbl;
uint16_t rx_id, nb_hold;
struct rte_eth_dev *dev;
struct idpf_adapter *ad;
uint16_t rx_packet_len;
struct rte_mbuf *rxm;
struct rte_mbuf *nmb;
uint16_t rx_status0;
uint64_t pkt_flags;
uint64_t dma_addr;
uint64_t ts_ns;
uint16_t nb_rx;
nb_rx = 0;
nb_hold = 0;
rxq = rx_queue;
ad = rxq->adapter;
if (unlikely(rxq == NULL) || unlikely(!rxq->q_started))
return nb_rx;
@ -1751,6 +1800,9 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rx_ring = rxq->rx_ring;
ptype_tbl = rxq->adapter->ptype_tbl;
if ((rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0)
rxq->hw_register_set = 1;
while (nb_rx < nb_pkts) {
rxdp = &rx_ring[rx_id];
rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0);
@ -1810,6 +1862,19 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rxm->ol_flags |= pkt_flags;
if (idpf_timestamp_dynflag > 0 &&
(rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) {
/* timestamp */
ts_ns = idpf_tstamp_convert_32b_64b(ad,
rxq->hw_register_set,
rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high));
rxq->hw_register_set = 0;
*RTE_MBUF_DYNFIELD(rxm,
idpf_timestamp_dynfield_offset,
rte_mbuf_timestamp_t *) = ts_ns;
rxm->ol_flags |= idpf_timestamp_dynflag;
}
rx_pkts[nb_rx++] = rxm;
}
rxq->rx_tail = rx_id;

View File

@ -7,6 +7,41 @@
#include "idpf_ethdev.h"
/* MTS */
#define GLTSYN_CMD_SYNC_0_0 (PF_TIMESYNC_BASE + 0x0)
#define PF_GLTSYN_SHTIME_0_0 (PF_TIMESYNC_BASE + 0x4)
#define PF_GLTSYN_SHTIME_L_0 (PF_TIMESYNC_BASE + 0x8)
#define PF_GLTSYN_SHTIME_H_0 (PF_TIMESYNC_BASE + 0xC)
#define GLTSYN_ART_L_0 (PF_TIMESYNC_BASE + 0x10)
#define GLTSYN_ART_H_0 (PF_TIMESYNC_BASE + 0x14)
#define PF_GLTSYN_SHTIME_0_1 (PF_TIMESYNC_BASE + 0x24)
#define PF_GLTSYN_SHTIME_L_1 (PF_TIMESYNC_BASE + 0x28)
#define PF_GLTSYN_SHTIME_H_1 (PF_TIMESYNC_BASE + 0x2C)
#define PF_GLTSYN_SHTIME_0_2 (PF_TIMESYNC_BASE + 0x44)
#define PF_GLTSYN_SHTIME_L_2 (PF_TIMESYNC_BASE + 0x48)
#define PF_GLTSYN_SHTIME_H_2 (PF_TIMESYNC_BASE + 0x4C)
#define PF_GLTSYN_SHTIME_0_3 (PF_TIMESYNC_BASE + 0x64)
#define PF_GLTSYN_SHTIME_L_3 (PF_TIMESYNC_BASE + 0x68)
#define PF_GLTSYN_SHTIME_H_3 (PF_TIMESYNC_BASE + 0x6C)
#define PF_TIMESYNC_BAR4_BASE 0x0E400000
#define GLTSYN_ENA (PF_TIMESYNC_BAR4_BASE + 0x90)
#define GLTSYN_CMD (PF_TIMESYNC_BAR4_BASE + 0x94)
#define GLTSYC_TIME_L (PF_TIMESYNC_BAR4_BASE + 0x104)
#define GLTSYC_TIME_H (PF_TIMESYNC_BAR4_BASE + 0x108)
#define GLTSYN_CMD_SYNC_0_4 (PF_TIMESYNC_BAR4_BASE + 0x110)
#define PF_GLTSYN_SHTIME_L_4 (PF_TIMESYNC_BAR4_BASE + 0x118)
#define PF_GLTSYN_SHTIME_H_4 (PF_TIMESYNC_BAR4_BASE + 0x11C)
#define GLTSYN_INCVAL_L (PF_TIMESYNC_BAR4_BASE + 0x150)
#define GLTSYN_INCVAL_H (PF_TIMESYNC_BAR4_BASE + 0x154)
#define GLTSYN_SHADJ_L (PF_TIMESYNC_BAR4_BASE + 0x158)
#define GLTSYN_SHADJ_H (PF_TIMESYNC_BAR4_BASE + 0x15C)
#define GLTSYN_CMD_SYNC_0_5 (PF_TIMESYNC_BAR4_BASE + 0x130)
#define PF_GLTSYN_SHTIME_L_5 (PF_TIMESYNC_BAR4_BASE + 0x138)
#define PF_GLTSYN_SHTIME_H_5 (PF_TIMESYNC_BAR4_BASE + 0x13C)
/* In QLEN must be whole number of 32 descriptors. */
#define IDPF_ALIGN_RING_DESC 32
#define IDPF_MIN_RING_DESC 32
@ -43,6 +78,8 @@
(sizeof(struct virtchnl2_ptype) + \
(((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)->proto_id[0])))
extern uint64_t idpf_timestamp_dynflag;
struct idpf_rx_queue {
struct idpf_adapter *adapter; /* the adapter this queue belongs to */
struct rte_mempool *mp; /* mbuf pool to populate Rx ring */
@ -198,4 +235,57 @@ void idpf_stop_queues(struct rte_eth_dev *dev);
void idpf_set_rx_function(struct rte_eth_dev *dev);
void idpf_set_tx_function(struct rte_eth_dev *dev);
#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000
/* Helper function to convert a 32b nanoseconds timestamp to 64b. */
static inline uint64_t
idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag,
uint32_t in_timestamp)
{
#ifdef RTE_ARCH_X86_64
struct idpf_hw *hw = &ad->hw;
const uint64_t mask = 0xFFFFFFFF;
uint32_t hi, lo, lo2, delta;
uint64_t ns;
if (flag != 0) {
IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
/*
* On typical system, the delta between lo and lo2 is ~1000ns,
* so 10000 seems a large-enough but not overly-big guard band.
*/
if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
else
lo2 = lo;
if (lo2 < lo) {
lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
}
ad->time_hw = ((uint64_t)hi << 32) | lo;
}
delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
if (delta > (mask / 2)) {
delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
ns = ad->time_hw - delta;
} else {
ns = ad->time_hw + delta;
}
return ns;
#else /* !RTE_ARCH_X86_64 */
RTE_SET_USED(ad);
RTE_SET_USED(flag);
RTE_SET_USED(in_timestamp);
return 0;
#endif /* RTE_ARCH_X86_64 */
}
#endif /* _IDPF_RXTX_H_ */