numam-dpdk/drivers/net/gve/gve_tx.c
Junfeng Guo 10d9e91a76 net/gve: support queue release
Add support for queue operations:
 - rx_queue_release
 - tx_queue_release

Previous gve_tx_queue_release and gve_rx_queue_release functions are
only used internally to release Rx/Tx queue related resources.

But when the queues or ports are required to re-config, both of the dev
ops tx_queue_release and ops rx_queue_release will be checked and then
called.

Without these two dev ops, the Rx/Tx queue struct will be set as NULL
directly.

Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@amd.com>
2022-11-11 15:48:09 +01:00

671 lines
17 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(C) 2022 Intel Corporation
*/
#include "gve_ethdev.h"
#include "base/gve_adminq.h"
static inline void
gve_free_bulk_mbuf(struct rte_mbuf **txep, int num)
{
struct rte_mbuf *m, *free[GVE_TX_MAX_FREE_SZ];
int nb_free = 0;
int i, s;
if (unlikely(num == 0))
return;
/* Find the 1st mbuf which needs to be free */
for (s = 0; s < num; s++) {
if (txep[s] != NULL) {
m = rte_pktmbuf_prefree_seg(txep[s]);
if (m != NULL)
break;
}
}
if (s == num)
return;
free[0] = m;
nb_free = 1;
for (i = s + 1; i < num; i++) {
if (likely(txep[i] != NULL)) {
m = rte_pktmbuf_prefree_seg(txep[i]);
if (likely(m != NULL)) {
if (likely(m->pool == free[0]->pool)) {
free[nb_free++] = m;
} else {
rte_mempool_put_bulk(free[0]->pool, (void *)free, nb_free);
free[0] = m;
nb_free = 1;
}
}
txep[i] = NULL;
}
}
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
}
static inline void
gve_tx_clean(struct gve_tx_queue *txq)
{
uint16_t mask = txq->nb_tx_desc - 1;
uint32_t start = txq->next_to_clean & mask;
uint32_t ntc, nb_clean, i;
struct gve_tx_iovec *iov;
ntc = rte_be_to_cpu_32(rte_read32(txq->qtx_head));
ntc = ntc & mask;
if (ntc == start)
return;
/* if wrap around, free twice. */
if (ntc < start) {
nb_clean = txq->nb_tx_desc - start;
if (nb_clean > GVE_TX_MAX_FREE_SZ)
nb_clean = GVE_TX_MAX_FREE_SZ;
if (txq->is_gqi_qpl) {
for (i = start; i < start + nb_clean; i++) {
iov = &txq->iov_ring[i];
txq->fifo_avail += iov->iov_len;
iov->iov_base = 0;
iov->iov_len = 0;
}
} else {
gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
}
txq->nb_free += nb_clean;
start += nb_clean;
if (start == txq->nb_tx_desc)
start = 0;
txq->next_to_clean += nb_clean;
}
if (ntc > start) {
nb_clean = ntc - start;
if (nb_clean > GVE_TX_MAX_FREE_SZ)
nb_clean = GVE_TX_MAX_FREE_SZ;
if (txq->is_gqi_qpl) {
for (i = start; i < start + nb_clean; i++) {
iov = &txq->iov_ring[i];
txq->fifo_avail += iov->iov_len;
iov->iov_base = 0;
iov->iov_len = 0;
}
} else {
gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
}
txq->nb_free += nb_clean;
txq->next_to_clean += nb_clean;
}
}
static inline void
gve_tx_clean_swr_qpl(struct gve_tx_queue *txq)
{
uint32_t start = txq->sw_ntc;
uint32_t ntc, nb_clean;
ntc = txq->sw_tail;
if (ntc == start)
return;
/* if wrap around, free twice. */
if (ntc < start) {
nb_clean = txq->nb_tx_desc - start;
if (nb_clean > GVE_TX_MAX_FREE_SZ)
nb_clean = GVE_TX_MAX_FREE_SZ;
gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
txq->sw_nb_free += nb_clean;
start += nb_clean;
if (start == txq->nb_tx_desc)
start = 0;
txq->sw_ntc = start;
}
if (ntc > start) {
nb_clean = ntc - start;
if (nb_clean > GVE_TX_MAX_FREE_SZ)
nb_clean = GVE_TX_MAX_FREE_SZ;
gve_free_bulk_mbuf(&txq->sw_ring[start], nb_clean);
txq->sw_nb_free += nb_clean;
start += nb_clean;
txq->sw_ntc = start;
}
}
static inline void
gve_tx_fill_pkt_desc(volatile union gve_tx_desc *desc, struct rte_mbuf *mbuf,
uint8_t desc_cnt, uint16_t len, uint64_t addr)
{
uint64_t csum_l4 = mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
uint8_t l4_csum_offset = 0;
uint8_t l4_hdr_offset = 0;
if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
switch (csum_l4) {
case RTE_MBUF_F_TX_TCP_CKSUM:
l4_csum_offset = offsetof(struct rte_tcp_hdr, cksum);
l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
break;
case RTE_MBUF_F_TX_UDP_CKSUM:
l4_csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
break;
case RTE_MBUF_F_TX_SCTP_CKSUM:
l4_csum_offset = offsetof(struct rte_sctp_hdr, cksum);
l4_hdr_offset = mbuf->l2_len + mbuf->l3_len;
break;
}
if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
} else if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
desc->pkt.l4_csum_offset = l4_csum_offset >> 1;
desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
} else {
desc->pkt.type_flags = GVE_TXD_STD;
desc->pkt.l4_csum_offset = 0;
desc->pkt.l4_hdr_offset = 0;
}
desc->pkt.desc_cnt = desc_cnt;
desc->pkt.len = rte_cpu_to_be_16(mbuf->pkt_len);
desc->pkt.seg_len = rte_cpu_to_be_16(len);
desc->pkt.seg_addr = rte_cpu_to_be_64(addr);
}
static inline void
gve_tx_fill_seg_desc(volatile union gve_tx_desc *desc, uint64_t ol_flags,
union gve_tx_offload tx_offload,
uint16_t len, uint64_t addr)
{
desc->seg.type_flags = GVE_TXD_SEG;
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
if (ol_flags & RTE_MBUF_F_TX_IPV6)
desc->seg.type_flags |= GVE_TXSF_IPV6;
desc->seg.l3_offset = tx_offload.l2_len >> 1;
desc->seg.mss = rte_cpu_to_be_16(tx_offload.tso_segsz);
}
desc->seg.seg_len = rte_cpu_to_be_16(len);
desc->seg.seg_addr = rte_cpu_to_be_64(addr);
}
static inline bool
is_fifo_avail(struct gve_tx_queue *txq, uint16_t len)
{
if (txq->fifo_avail < len)
return false;
/* Don't split segment. */
if (txq->fifo_head + len > txq->fifo_size &&
txq->fifo_size - txq->fifo_head + len > txq->fifo_avail)
return false;
return true;
}
static inline uint64_t
gve_tx_alloc_from_fifo(struct gve_tx_queue *txq, uint16_t tx_id, uint16_t len)
{
uint32_t head = txq->fifo_head;
uint32_t size = txq->fifo_size;
struct gve_tx_iovec *iov;
uint32_t aligned_head;
uint32_t iov_len = 0;
uint64_t fifo_addr;
iov = &txq->iov_ring[tx_id];
/* Don't split segment */
if (head + len > size) {
iov_len += (size - head);
head = 0;
}
fifo_addr = head;
iov_len += len;
iov->iov_base = head;
/* Re-align to a cacheline for next head */
head += len;
aligned_head = RTE_ALIGN(head, RTE_CACHE_LINE_SIZE);
iov_len += (aligned_head - head);
iov->iov_len = iov_len;
if (aligned_head == txq->fifo_size)
aligned_head = 0;
txq->fifo_head = aligned_head;
txq->fifo_avail -= iov_len;
return fifo_addr;
}
static inline uint16_t
gve_tx_burst_qpl(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
union gve_tx_offload tx_offload = {0};
volatile union gve_tx_desc *txr, *txd;
struct gve_tx_queue *txq = tx_queue;
struct rte_mbuf **sw_ring = txq->sw_ring;
uint16_t mask = txq->nb_tx_desc - 1;
uint16_t tx_id = txq->tx_tail & mask;
uint64_t ol_flags, addr, fifo_addr;
uint32_t tx_tail = txq->tx_tail;
struct rte_mbuf *tx_pkt, *first;
uint16_t sw_id = txq->sw_tail;
uint16_t nb_used, i;
uint16_t nb_tx = 0;
uint32_t hlen;
txr = txq->tx_desc_ring;
if (txq->nb_free < txq->free_thresh || txq->fifo_avail == 0)
gve_tx_clean(txq);
if (txq->sw_nb_free < txq->free_thresh)
gve_tx_clean_swr_qpl(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
tx_pkt = *tx_pkts++;
ol_flags = tx_pkt->ol_flags;
if (txq->sw_nb_free < tx_pkt->nb_segs) {
gve_tx_clean_swr_qpl(txq);
if (txq->sw_nb_free < tx_pkt->nb_segs)
goto end_of_tx;
}
/* Even for multi-segs, use 1 qpl buf for data */
nb_used = 1;
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
nb_used++;
if (txq->nb_free < nb_used)
goto end_of_tx;
tx_offload.l2_len = tx_pkt->l2_len;
tx_offload.l3_len = tx_pkt->l3_len;
tx_offload.l4_len = tx_pkt->l4_len;
tx_offload.tso_segsz = tx_pkt->tso_segsz;
first = tx_pkt;
txd = &txr[tx_id];
hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
(uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
tx_pkt->pkt_len;
sw_ring[sw_id] = tx_pkt;
if (!is_fifo_avail(txq, hlen)) {
gve_tx_clean(txq);
if (!is_fifo_avail(txq, hlen))
goto end_of_tx;
}
addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off;
fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, hlen);
/* For TSO, check if there's enough fifo space for data first */
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen)) {
gve_tx_clean(txq);
if (!is_fifo_avail(txq, tx_pkt->pkt_len - hlen))
goto end_of_tx;
}
}
if (tx_pkt->nb_segs == 1 || ol_flags & RTE_MBUF_F_TX_TCP_SEG)
rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
(void *)(size_t)addr, hlen);
else
rte_pktmbuf_read(tx_pkt, 0, hlen,
(void *)(size_t)(fifo_addr + txq->fifo_base));
gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, fifo_addr);
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
tx_id = (tx_id + 1) & mask;
txd = &txr[tx_id];
addr = (uint64_t)(tx_pkt->buf_addr) + tx_pkt->data_off + hlen;
fifo_addr = gve_tx_alloc_from_fifo(txq, tx_id, tx_pkt->pkt_len - hlen);
if (tx_pkt->nb_segs == 1)
rte_memcpy((void *)(size_t)(fifo_addr + txq->fifo_base),
(void *)(size_t)addr,
tx_pkt->pkt_len - hlen);
else
rte_pktmbuf_read(tx_pkt, hlen, tx_pkt->pkt_len - hlen,
(void *)(size_t)(fifo_addr + txq->fifo_base));
gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
tx_pkt->pkt_len - hlen, fifo_addr);
}
/* record mbuf in sw_ring for free */
for (i = 1; i < first->nb_segs; i++) {
sw_id = (sw_id + 1) & mask;
tx_pkt = tx_pkt->next;
sw_ring[sw_id] = tx_pkt;
}
sw_id = (sw_id + 1) & mask;
tx_id = (tx_id + 1) & mask;
txq->nb_free -= nb_used;
txq->sw_nb_free -= first->nb_segs;
tx_tail += nb_used;
}
end_of_tx:
if (nb_tx) {
rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
txq->tx_tail = tx_tail;
txq->sw_tail = sw_id;
}
return nb_tx;
}
static inline uint16_t
gve_tx_burst_ra(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
union gve_tx_offload tx_offload = {0};
volatile union gve_tx_desc *txr, *txd;
struct gve_tx_queue *txq = tx_queue;
struct rte_mbuf **sw_ring = txq->sw_ring;
uint16_t mask = txq->nb_tx_desc - 1;
uint16_t tx_id = txq->tx_tail & mask;
uint32_t tx_tail = txq->tx_tail;
struct rte_mbuf *tx_pkt, *first;
uint16_t nb_used, hlen, i;
uint64_t ol_flags, addr;
uint16_t nb_tx = 0;
txr = txq->tx_desc_ring;
if (txq->nb_free < txq->free_thresh)
gve_tx_clean(txq);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
tx_pkt = *tx_pkts++;
ol_flags = tx_pkt->ol_flags;
nb_used = tx_pkt->nb_segs;
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
nb_used++;
if (txq->nb_free < nb_used)
goto end_of_tx;
tx_offload.l2_len = tx_pkt->l2_len;
tx_offload.l3_len = tx_pkt->l3_len;
tx_offload.l4_len = tx_pkt->l4_len;
tx_offload.tso_segsz = tx_pkt->tso_segsz;
first = tx_pkt;
txd = &txr[tx_id];
hlen = ol_flags & RTE_MBUF_F_TX_TCP_SEG ?
(uint32_t)(tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len) :
tx_pkt->pkt_len;
/*
* if tso, the driver needs to fill 2 descs for 1 mbuf
* so only put this mbuf into the 1st tx entry in sw ring
*/
sw_ring[tx_id] = tx_pkt;
addr = rte_mbuf_data_iova(tx_pkt);
gve_tx_fill_pkt_desc(txd, tx_pkt, nb_used, hlen, addr);
if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
tx_id = (tx_id + 1) & mask;
txd = &txr[tx_id];
addr = rte_mbuf_data_iova(tx_pkt) + hlen;
gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
tx_pkt->data_len - hlen, addr);
}
for (i = 1; i < first->nb_segs; i++) {
tx_id = (tx_id + 1) & mask;
txd = &txr[tx_id];
tx_pkt = tx_pkt->next;
sw_ring[tx_id] = tx_pkt;
addr = rte_mbuf_data_iova(tx_pkt);
gve_tx_fill_seg_desc(txd, ol_flags, tx_offload,
tx_pkt->data_len, addr);
}
tx_id = (tx_id + 1) & mask;
txq->nb_free -= nb_used;
tx_tail += nb_used;
}
end_of_tx:
if (nb_tx) {
rte_write32(rte_cpu_to_be_32(tx_tail), txq->qtx_tail);
txq->tx_tail = tx_tail;
}
return nb_tx;
}
uint16_t
gve_tx_burst(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct gve_tx_queue *txq = tx_queue;
if (txq->is_gqi_qpl)
return gve_tx_burst_qpl(tx_queue, tx_pkts, nb_pkts);
return gve_tx_burst_ra(tx_queue, tx_pkts, nb_pkts);
}
static inline void
gve_reset_txq(struct gve_tx_queue *txq)
{
struct rte_mbuf **sw_ring;
uint32_t size, i;
if (txq == NULL) {
PMD_DRV_LOG(ERR, "Pointer to txq is NULL");
return;
}
size = txq->nb_tx_desc * sizeof(union gve_tx_desc);
for (i = 0; i < size; i++)
((volatile char *)txq->tx_desc_ring)[i] = 0;
sw_ring = txq->sw_ring;
for (i = 0; i < txq->nb_tx_desc; i++) {
sw_ring[i] = NULL;
if (txq->is_gqi_qpl) {
txq->iov_ring[i].iov_base = 0;
txq->iov_ring[i].iov_len = 0;
}
}
txq->tx_tail = 0;
txq->nb_free = txq->nb_tx_desc - 1;
txq->next_to_clean = 0;
if (txq->is_gqi_qpl) {
txq->fifo_size = PAGE_SIZE * txq->hw->tx_pages_per_qpl;
txq->fifo_avail = txq->fifo_size;
txq->fifo_head = 0;
txq->fifo_base = (uint64_t)(txq->qpl->mz->addr);
txq->sw_tail = 0;
txq->sw_nb_free = txq->nb_tx_desc - 1;
txq->sw_ntc = 0;
}
}
static inline void
gve_release_txq_mbufs(struct gve_tx_queue *txq)
{
uint16_t i;
for (i = 0; i < txq->nb_tx_desc; i++) {
if (txq->sw_ring[i]) {
rte_pktmbuf_free_seg(txq->sw_ring[i]);
txq->sw_ring[i] = NULL;
}
}
}
void
gve_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
{
struct gve_tx_queue *q = dev->data->tx_queues[qid];
if (!q)
return;
if (q->is_gqi_qpl) {
gve_adminq_unregister_page_list(q->hw, q->qpl->id);
rte_free(q->iov_ring);
q->qpl = NULL;
}
gve_release_txq_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_memzone_free(q->qres_mz);
q->qres = NULL;
rte_free(q);
}
int
gve_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id, uint16_t nb_desc,
unsigned int socket_id, const struct rte_eth_txconf *conf)
{
struct gve_priv *hw = dev->data->dev_private;
const struct rte_memzone *mz;
struct gve_tx_queue *txq;
uint16_t free_thresh;
int err = 0;
if (nb_desc != hw->tx_desc_cnt) {
PMD_DRV_LOG(WARNING, "gve doesn't support nb_desc config, use hw nb_desc %u.",
hw->tx_desc_cnt);
}
nb_desc = hw->tx_desc_cnt;
/* Free memory if needed. */
if (dev->data->tx_queues[queue_id]) {
gve_tx_queue_release(dev, queue_id);
dev->data->tx_queues[queue_id] = NULL;
}
/* Allocate the TX queue data structure. */
txq = rte_zmalloc_socket("gve txq", sizeof(struct gve_tx_queue),
RTE_CACHE_LINE_SIZE, socket_id);
if (!txq) {
PMD_DRV_LOG(ERR, "Failed to allocate memory for tx queue structure");
err = -ENOMEM;
goto err_txq;
}
free_thresh = conf->tx_free_thresh ? conf->tx_free_thresh : GVE_DEFAULT_TX_FREE_THRESH;
if (free_thresh >= nb_desc - 3) {
PMD_DRV_LOG(ERR, "tx_free_thresh (%u) must be less than nb_desc (%u) minus 3.",
free_thresh, txq->nb_tx_desc);
err = -EINVAL;
goto err_txq;
}
txq->nb_tx_desc = nb_desc;
txq->free_thresh = free_thresh;
txq->queue_id = queue_id;
txq->port_id = dev->data->port_id;
txq->ntfy_id = queue_id;
txq->is_gqi_qpl = hw->queue_format == GVE_GQI_QPL_FORMAT;
txq->hw = hw;
txq->ntfy_addr = &hw->db_bar2[rte_be_to_cpu_32(hw->irq_dbs[txq->ntfy_id].id)];
/* Allocate software ring */
txq->sw_ring = rte_zmalloc_socket("gve tx sw ring",
sizeof(struct rte_mbuf *) * nb_desc,
RTE_CACHE_LINE_SIZE, socket_id);
if (!txq->sw_ring) {
PMD_DRV_LOG(ERR, "Failed to allocate memory for SW TX ring");
err = -ENOMEM;
goto err_txq;
}
mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_id,
nb_desc * sizeof(union gve_tx_desc),
PAGE_SIZE, socket_id);
if (mz == NULL) {
PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX");
err = -ENOMEM;
goto err_sw_ring;
}
txq->tx_desc_ring = (union gve_tx_desc *)mz->addr;
txq->tx_ring_phys_addr = mz->iova;
txq->mz = mz;
if (txq->is_gqi_qpl) {
txq->iov_ring = rte_zmalloc_socket("gve tx iov ring",
sizeof(struct gve_tx_iovec) * nb_desc,
RTE_CACHE_LINE_SIZE, socket_id);
if (!txq->iov_ring) {
PMD_DRV_LOG(ERR, "Failed to allocate memory for SW TX ring");
err = -ENOMEM;
goto err_tx_ring;
}
txq->qpl = &hw->qpl[queue_id];
err = gve_adminq_register_page_list(hw, txq->qpl);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to register qpl %u", queue_id);
goto err_iov_ring;
}
}
mz = rte_eth_dma_zone_reserve(dev, "txq_res", queue_id, sizeof(struct gve_queue_resources),
PAGE_SIZE, socket_id);
if (mz == NULL) {
PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX resource");
err = -ENOMEM;
goto err_iov_ring;
}
txq->qres = (struct gve_queue_resources *)mz->addr;
txq->qres_mz = mz;
gve_reset_txq(txq);
dev->data->tx_queues[queue_id] = txq;
return 0;
err_iov_ring:
if (txq->is_gqi_qpl)
rte_free(txq->iov_ring);
err_tx_ring:
rte_memzone_free(txq->mz);
err_sw_ring:
rte_free(txq->sw_ring);
err_txq:
rte_free(txq);
return err;
}
void
gve_stop_tx_queues(struct rte_eth_dev *dev)
{
struct gve_priv *hw = dev->data->dev_private;
struct gve_tx_queue *txq;
uint16_t i;
int err;
err = gve_adminq_destroy_tx_queues(hw, dev->data->nb_tx_queues);
if (err != 0)
PMD_DRV_LOG(WARNING, "failed to destroy txqs");
for (i = 0; i < dev->data->nb_tx_queues; i++) {
txq = dev->data->tx_queues[i];
gve_release_txq_mbufs(txq);
gve_reset_txq(txq);
}
}