numam-dpdk/drivers/net/sfc/sfc_tx.c
Ivan Malov 91d1627654 net/sfc: prepare to merge init logs with main log type
Conversion to dynamic logging done in the previous patches
makes it possible to simplify internal controls for init
logging. This patch allows to prepare for such a change.
It makes init-unrelated messages use NOTICE level so that
the following patch will be able to convert init logging
to use INFO level and remain silent by default.

Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <arybchenko@solarflare.com>
Reviewed-by: Andy Moreton <amoreton@solarflare.com>
2018-03-30 14:08:44 +02:00

1131 lines
28 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2016-2018 Solarflare Communications Inc.
* All rights reserved.
*
* This software was jointly developed between OKTET Labs (under contract
* for Solarflare) and Solarflare Communications, Inc.
*/
#include "sfc.h"
#include "sfc_debug.h"
#include "sfc_log.h"
#include "sfc_ev.h"
#include "sfc_tx.h"
#include "sfc_tweak.h"
#include "sfc_kvargs.h"
/*
* Maximum number of TX queue flush attempts in case of
* failure or flush timeout
*/
#define SFC_TX_QFLUSH_ATTEMPTS (3)
/*
* Time to wait between event queue polling attempts when waiting for TX
* queue flush done or flush failed events
*/
#define SFC_TX_QFLUSH_POLL_WAIT_MS (1)
/*
* Maximum number of event queue polling attempts when waiting for TX queue
* flush done or flush failed events; it defines TX queue flush attempt timeout
* together with SFC_TX_QFLUSH_POLL_WAIT_MS
*/
#define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000)
uint64_t
sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa)
{
const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
uint64_t caps = 0;
if ((sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) &&
encp->enc_hw_tx_insert_vlan_enabled)
caps |= DEV_TX_OFFLOAD_VLAN_INSERT;
if (sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)
caps |= DEV_TX_OFFLOAD_MULTI_SEGS;
if ((~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) &&
(~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT))
caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
return caps;
}
uint64_t
sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa)
{
const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
uint64_t caps = 0;
caps |= DEV_TX_OFFLOAD_IPV4_CKSUM;
caps |= DEV_TX_OFFLOAD_UDP_CKSUM;
caps |= DEV_TX_OFFLOAD_TCP_CKSUM;
if (encp->enc_tunnel_encapsulations_supported)
caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
if (sa->tso)
caps |= DEV_TX_OFFLOAD_TCP_TSO;
return caps;
}
static void
sfc_tx_log_offloads(struct sfc_adapter *sa, const char *offload_group,
const char *verdict, uint64_t offloads)
{
unsigned long long bit;
while ((bit = __builtin_ffsll(offloads)) != 0) {
uint64_t flag = (1ULL << --bit);
sfc_err(sa, "Tx %s offload %s %s", offload_group,
rte_eth_dev_tx_offload_name(flag), verdict);
offloads &= ~flag;
}
}
static int
sfc_tx_queue_offload_mismatch(struct sfc_adapter *sa, uint64_t requested)
{
uint64_t mandatory = sa->eth_dev->data->dev_conf.txmode.offloads;
uint64_t supported = sfc_tx_get_dev_offload_caps(sa) |
sfc_tx_get_queue_offload_caps(sa);
uint64_t rejected = requested & ~supported;
uint64_t missing = (requested & mandatory) ^ mandatory;
boolean_t mismatch = B_FALSE;
if (rejected) {
sfc_tx_log_offloads(sa, "queue", "is unsupported", rejected);
mismatch = B_TRUE;
}
if (missing) {
sfc_tx_log_offloads(sa, "queue", "must be set", missing);
mismatch = B_TRUE;
}
return mismatch;
}
static int
sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level,
const struct rte_eth_txconf *tx_conf)
{
int rc = 0;
if (tx_conf->tx_rs_thresh != 0) {
sfc_err(sa, "RS bit in transmit descriptor is not supported");
rc = EINVAL;
}
if (tx_conf->tx_free_thresh > txq_max_fill_level) {
sfc_err(sa,
"TxQ free threshold too large: %u vs maximum %u",
tx_conf->tx_free_thresh, txq_max_fill_level);
rc = EINVAL;
}
if (tx_conf->tx_thresh.pthresh != 0 ||
tx_conf->tx_thresh.hthresh != 0 ||
tx_conf->tx_thresh.wthresh != 0) {
sfc_warn(sa,
"prefetch/host/writeback thresholds are not supported");
}
/* We either perform both TCP and UDP offload, or no offload at all */
if (((tx_conf->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) !=
((tx_conf->offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) {
sfc_err(sa, "TCP and UDP offloads can't be set independently");
rc = EINVAL;
}
if (sfc_tx_queue_offload_mismatch(sa, tx_conf->offloads))
rc = EINVAL;
return rc;
}
void
sfc_tx_qflush_done(struct sfc_txq *txq)
{
txq->state |= SFC_TXQ_FLUSHED;
txq->state &= ~SFC_TXQ_FLUSHING;
}
int
sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
uint16_t nb_tx_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
unsigned int txq_entries;
unsigned int evq_entries;
unsigned int txq_max_fill_level;
struct sfc_txq_info *txq_info;
struct sfc_evq *evq;
struct sfc_txq *txq;
int rc = 0;
struct sfc_dp_tx_qcreate_info info;
sfc_log_init(sa, "TxQ = %u", sw_index);
rc = sa->dp_tx->qsize_up_rings(nb_tx_desc, &txq_entries, &evq_entries,
&txq_max_fill_level);
if (rc != 0)
goto fail_size_up_rings;
SFC_ASSERT(txq_entries >= EFX_TXQ_MINNDESCS);
SFC_ASSERT(txq_entries <= sa->txq_max_entries);
SFC_ASSERT(txq_entries >= nb_tx_desc);
SFC_ASSERT(txq_max_fill_level <= nb_tx_desc);
rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf);
if (rc != 0)
goto fail_bad_conf;
SFC_ASSERT(sw_index < sa->txq_count);
txq_info = &sa->txq_info[sw_index];
txq_info->entries = txq_entries;
rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index,
evq_entries, socket_id, &evq);
if (rc != 0)
goto fail_ev_qinit;
rc = ENOMEM;
txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id);
if (txq == NULL)
goto fail_txq_alloc;
txq_info->txq = txq;
txq->hw_index = sw_index;
txq->evq = evq;
txq->free_thresh =
(tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh :
SFC_TX_DEFAULT_FREE_THRESH;
txq->flags = tx_conf->txq_flags;
txq->offloads = tx_conf->offloads;
rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries),
socket_id, &txq->mem);
if (rc != 0)
goto fail_dma_alloc;
memset(&info, 0, sizeof(info));
info.max_fill_level = txq_max_fill_level;
info.free_thresh = txq->free_thresh;
info.flags = tx_conf->txq_flags;
info.offloads = tx_conf->offloads;
info.txq_entries = txq_info->entries;
info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max;
info.txq_hw_ring = txq->mem.esm_base;
info.evq_entries = evq_entries;
info.evq_hw_ring = evq->mem.esm_base;
info.hw_index = txq->hw_index;
info.mem_bar = sa->mem_bar.esb_base;
rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
&RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
socket_id, &info, &txq->dp);
if (rc != 0)
goto fail_dp_tx_qinit;
evq->dp_txq = txq->dp;
txq->state = SFC_TXQ_INITIALIZED;
txq_info->deferred_start = (tx_conf->tx_deferred_start != 0);
return 0;
fail_dp_tx_qinit:
sfc_dma_free(sa, &txq->mem);
fail_dma_alloc:
txq_info->txq = NULL;
rte_free(txq);
fail_txq_alloc:
sfc_ev_qfini(evq);
fail_ev_qinit:
txq_info->entries = 0;
fail_bad_conf:
fail_size_up_rings:
sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc);
return rc;
}
void
sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
{
struct sfc_txq_info *txq_info;
struct sfc_txq *txq;
sfc_log_init(sa, "TxQ = %u", sw_index);
SFC_ASSERT(sw_index < sa->txq_count);
txq_info = &sa->txq_info[sw_index];
txq = txq_info->txq;
SFC_ASSERT(txq != NULL);
SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED);
sa->dp_tx->qdestroy(txq->dp);
txq->dp = NULL;
txq_info->txq = NULL;
txq_info->entries = 0;
sfc_dma_free(sa, &txq->mem);
sfc_ev_qfini(txq->evq);
txq->evq = NULL;
rte_free(txq);
}
static int
sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
{
sfc_log_init(sa, "TxQ = %u", sw_index);
return 0;
}
static int
sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode)
{
uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) |
sfc_tx_get_queue_offload_caps(sa);
uint64_t offloads_rejected = txmode->offloads & ~offloads_supported;
int rc = 0;
switch (txmode->mq_mode) {
case ETH_MQ_TX_NONE:
break;
default:
sfc_err(sa, "Tx multi-queue mode %u not supported",
txmode->mq_mode);
rc = EINVAL;
}
/*
* These features are claimed to be i40e-specific,
* but it does make sense to double-check their absence
*/
if (txmode->hw_vlan_reject_tagged) {
sfc_err(sa, "Rejecting tagged packets not supported");
rc = EINVAL;
}
if (txmode->hw_vlan_reject_untagged) {
sfc_err(sa, "Rejecting untagged packets not supported");
rc = EINVAL;
}
if (txmode->hw_vlan_insert_pvid) {
sfc_err(sa, "Port-based VLAN insertion not supported");
rc = EINVAL;
}
if (offloads_rejected) {
sfc_tx_log_offloads(sa, "device", "is unsupported",
offloads_rejected);
rc = EINVAL;
}
return rc;
}
/**
* Destroy excess queues that are no longer needed after reconfiguration
* or complete close.
*/
static void
sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues)
{
int sw_index;
SFC_ASSERT(nb_tx_queues <= sa->txq_count);
sw_index = sa->txq_count;
while (--sw_index >= (int)nb_tx_queues) {
if (sa->txq_info[sw_index].txq != NULL)
sfc_tx_qfini(sa, sw_index);
}
sa->txq_count = nb_tx_queues;
}
int
sfc_tx_configure(struct sfc_adapter *sa)
{
const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues;
int rc = 0;
sfc_log_init(sa, "nb_tx_queues=%u (old %u)",
nb_tx_queues, sa->txq_count);
/*
* The datapath implementation assumes absence of boundary
* limits on Tx DMA descriptors. Addition of these checks on
* datapath would simply make the datapath slower.
*/
if (encp->enc_tx_dma_desc_boundary != 0) {
rc = ENOTSUP;
goto fail_tx_dma_desc_boundary;
}
rc = sfc_tx_check_mode(sa, &dev_conf->txmode);
if (rc != 0)
goto fail_check_mode;
if (nb_tx_queues == sa->txq_count)
goto done;
if (sa->txq_info == NULL) {
sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues,
sizeof(sa->txq_info[0]), 0,
sa->socket_id);
if (sa->txq_info == NULL)
goto fail_txqs_alloc;
} else {
struct sfc_txq_info *new_txq_info;
if (nb_tx_queues < sa->txq_count)
sfc_tx_fini_queues(sa, nb_tx_queues);
new_txq_info =
rte_realloc(sa->txq_info,
nb_tx_queues * sizeof(sa->txq_info[0]), 0);
if (new_txq_info == NULL && nb_tx_queues > 0)
goto fail_txqs_realloc;
sa->txq_info = new_txq_info;
if (nb_tx_queues > sa->txq_count)
memset(&sa->txq_info[sa->txq_count], 0,
(nb_tx_queues - sa->txq_count) *
sizeof(sa->txq_info[0]));
}
while (sa->txq_count < nb_tx_queues) {
rc = sfc_tx_qinit_info(sa, sa->txq_count);
if (rc != 0)
goto fail_tx_qinit_info;
sa->txq_count++;
}
done:
return 0;
fail_tx_qinit_info:
fail_txqs_realloc:
fail_txqs_alloc:
sfc_tx_close(sa);
fail_check_mode:
fail_tx_dma_desc_boundary:
sfc_log_init(sa, "failed (rc = %d)", rc);
return rc;
}
void
sfc_tx_close(struct sfc_adapter *sa)
{
sfc_tx_fini_queues(sa, 0);
rte_free(sa->txq_info);
sa->txq_info = NULL;
}
int
sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
{
uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) |
sfc_tx_get_queue_offload_caps(sa);
struct rte_eth_dev_data *dev_data;
struct sfc_txq_info *txq_info;
struct sfc_txq *txq;
struct sfc_evq *evq;
uint16_t flags = 0;
unsigned int desc_index;
int rc = 0;
sfc_log_init(sa, "TxQ = %u", sw_index);
SFC_ASSERT(sw_index < sa->txq_count);
txq_info = &sa->txq_info[sw_index];
txq = txq_info->txq;
SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED);
evq = txq->evq;
rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index));
if (rc != 0)
goto fail_ev_qstart;
/*
* The absence of ETH_TXQ_FLAGS_IGNORE is associated with a legacy
* application which expects that IPv4 checksum offload is enabled
* all the time as there is no legacy flag to turn off the offload.
*/
if ((txq->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) ||
(~txq->flags & ETH_TXQ_FLAGS_IGNORE))
flags |= EFX_TXQ_CKSUM_IPV4;
if ((txq->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ||
((~txq->flags & ETH_TXQ_FLAGS_IGNORE) &&
(offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)))
flags |= EFX_TXQ_CKSUM_INNER_IPV4;
if ((txq->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) ||
(txq->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) {
flags |= EFX_TXQ_CKSUM_TCPUDP;
if ((~txq->flags & ETH_TXQ_FLAGS_IGNORE) &&
(offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
flags |= EFX_TXQ_CKSUM_INNER_TCPUDP;
}
/*
* The absence of ETH_TXQ_FLAGS_IGNORE is associated with a legacy
* application. In turn, the absence of ETH_TXQ_FLAGS_NOXSUMTCP is
* associated specifically with a legacy application which expects
* both TCP checksum offload and TSO to be enabled because the legacy
* API does not provide a dedicated mechanism to control TSO.
*/
if ((txq->offloads & DEV_TX_OFFLOAD_TCP_TSO) ||
((~txq->flags & ETH_TXQ_FLAGS_IGNORE) &&
(~txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP)))
flags |= EFX_TXQ_FATSOV2;
rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem,
txq_info->entries, 0 /* not used on EF10 */,
flags, evq->common,
&txq->common, &desc_index);
if (rc != 0) {
if (sa->tso && (rc == ENOSPC))
sfc_err(sa, "ran out of TSO contexts");
goto fail_tx_qcreate;
}
efx_tx_qenable(txq->common);
txq->state |= SFC_TXQ_STARTED;
rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index);
if (rc != 0)
goto fail_dp_qstart;
/*
* It seems to be used by DPDK for debug purposes only ('rte_ether')
*/
dev_data = sa->eth_dev->data;
dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
fail_dp_qstart:
txq->state = SFC_TXQ_INITIALIZED;
efx_tx_qdestroy(txq->common);
fail_tx_qcreate:
sfc_ev_qstop(evq);
fail_ev_qstart:
return rc;
}
void
sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
{
struct rte_eth_dev_data *dev_data;
struct sfc_txq_info *txq_info;
struct sfc_txq *txq;
unsigned int retry_count;
unsigned int wait_count;
int rc;
sfc_log_init(sa, "TxQ = %u", sw_index);
SFC_ASSERT(sw_index < sa->txq_count);
txq_info = &sa->txq_info[sw_index];
txq = txq_info->txq;
if (txq->state == SFC_TXQ_INITIALIZED)
return;
SFC_ASSERT(txq->state & SFC_TXQ_STARTED);
sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr);
/*
* Retry TX queue flushing in case of flush failed or
* timeout; in the worst case it can delay for 6 seconds
*/
for (retry_count = 0;
((txq->state & SFC_TXQ_FLUSHED) == 0) &&
(retry_count < SFC_TX_QFLUSH_ATTEMPTS);
++retry_count) {
rc = efx_tx_qflush(txq->common);
if (rc != 0) {
txq->state |= (rc == EALREADY) ?
SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED;
break;
}
/*
* Wait for TX queue flush done or flush failed event at least
* SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more
* than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied
* by SFC_TX_QFLUSH_POLL_ATTEMPTS)
*/
wait_count = 0;
do {
rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS);
sfc_ev_qpoll(txq->evq);
} while ((txq->state & SFC_TXQ_FLUSHING) &&
wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS);
if (txq->state & SFC_TXQ_FLUSHING)
sfc_err(sa, "TxQ %u flush timed out", sw_index);
if (txq->state & SFC_TXQ_FLUSHED)
sfc_notice(sa, "TxQ %u flushed", sw_index);
}
sa->dp_tx->qreap(txq->dp);
txq->state = SFC_TXQ_INITIALIZED;
efx_tx_qdestroy(txq->common);
sfc_ev_qstop(txq->evq);
/*
* It seems to be used by DPDK for debug purposes only ('rte_ether')
*/
dev_data = sa->eth_dev->data;
dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED;
}
int
sfc_tx_start(struct sfc_adapter *sa)
{
unsigned int sw_index;
int rc = 0;
sfc_log_init(sa, "txq_count = %u", sa->txq_count);
if (sa->tso) {
if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) {
sfc_warn(sa, "TSO support was unable to be restored");
sa->tso = B_FALSE;
}
}
rc = efx_tx_init(sa->nic);
if (rc != 0)
goto fail_efx_tx_init;
for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) {
if (!(sa->txq_info[sw_index].deferred_start) ||
sa->txq_info[sw_index].deferred_started) {
rc = sfc_tx_qstart(sa, sw_index);
if (rc != 0)
goto fail_tx_qstart;
}
}
return 0;
fail_tx_qstart:
while (sw_index-- > 0)
sfc_tx_qstop(sa, sw_index);
efx_tx_fini(sa->nic);
fail_efx_tx_init:
sfc_log_init(sa, "failed (rc = %d)", rc);
return rc;
}
void
sfc_tx_stop(struct sfc_adapter *sa)
{
unsigned int sw_index;
sfc_log_init(sa, "txq_count = %u", sa->txq_count);
sw_index = sa->txq_count;
while (sw_index-- > 0) {
if (sa->txq_info[sw_index].txq != NULL)
sfc_tx_qstop(sa, sw_index);
}
efx_tx_fini(sa->nic);
}
static void
sfc_efx_tx_reap(struct sfc_efx_txq *txq)
{
unsigned int completed;
sfc_ev_qpoll(txq->evq);
for (completed = txq->completed;
completed != txq->pending; completed++) {
struct sfc_efx_tx_sw_desc *txd;
txd = &txq->sw_ring[completed & txq->ptr_mask];
if (txd->mbuf != NULL) {
rte_pktmbuf_free(txd->mbuf);
txd->mbuf = NULL;
}
}
txq->completed = completed;
}
/*
* The function is used to insert or update VLAN tag;
* the firmware has state of the firmware tag to insert per TxQ
* (controlled by option descriptors), hence, if the tag of the
* packet to be sent is different from one remembered by the firmware,
* the function will update it
*/
static unsigned int
sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m,
efx_desc_t **pend)
{
uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ?
m->vlan_tci : 0);
if (this_tag == txq->hw_vlan_tci)
return 0;
/*
* The expression inside SFC_ASSERT() is not desired to be checked in
* a non-debug build because it might be too expensive on the data path
*/
SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled);
efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag),
*pend);
(*pend)++;
txq->hw_vlan_tci = this_tag;
return 1;
}
static uint16_t
sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue;
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
unsigned int added = txq->added;
unsigned int pushed = added;
unsigned int pkts_sent = 0;
efx_desc_t *pend = &txq->pend_desc[0];
const unsigned int hard_max_fill = txq->max_fill_level;
const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh;
unsigned int fill_level = added - txq->completed;
boolean_t reap_done;
int rc __rte_unused;
struct rte_mbuf **pktp;
if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0))
goto done;
/*
* If insufficient space for a single packet is present,
* we should reap; otherwise, we shouldn't do that all the time
* to avoid latency increase
*/
reap_done = (fill_level > soft_max_fill);
if (reap_done) {
sfc_efx_tx_reap(txq);
/*
* Recalculate fill level since 'txq->completed'
* might have changed on reap
*/
fill_level = added - txq->completed;
}
for (pkts_sent = 0, pktp = &tx_pkts[0];
(pkts_sent < nb_pkts) && (fill_level <= soft_max_fill);
pkts_sent++, pktp++) {
struct rte_mbuf *m_seg = *pktp;
size_t pkt_len = m_seg->pkt_len;
unsigned int pkt_descs = 0;
size_t in_off = 0;
/*
* Here VLAN TCI is expected to be zero in case if no
* DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised;
* if the calling app ignores the absence of
* DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then
* TX_ERROR will occur
*/
pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend);
if (m_seg->ol_flags & PKT_TX_TCP_SEG) {
/*
* We expect correct 'pkt->l[2, 3, 4]_len' values
* to be set correctly by the caller
*/
if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend,
&pkt_descs, &pkt_len) != 0) {
/* We may have reached this place for
* one of the following reasons:
*
* 1) Packet header length is greater
* than SFC_TSOH_STD_LEN
* 2) TCP header starts at more then
* 208 bytes into the frame
*
* We will deceive RTE saying that we have sent
* the packet, but we will actually drop it.
* Hence, we should revert 'pend' to the
* previous state (in case we have added
* VLAN descriptor) and start processing
* another one packet. But the original
* mbuf shouldn't be orphaned
*/
pend -= pkt_descs;
rte_pktmbuf_free(*pktp);
continue;
}
/*
* We've only added 2 FATSOv2 option descriptors
* and 1 descriptor for the linearized packet header.
* The outstanding work will be done in the same manner
* as for the usual non-TSO path
*/
}
for (; m_seg != NULL; m_seg = m_seg->next) {
efsys_dma_addr_t next_frag;
size_t seg_len;
seg_len = m_seg->data_len;
next_frag = rte_mbuf_data_iova(m_seg);
/*
* If we've started TSO transaction few steps earlier,
* we'll skip packet header using an offset in the
* current segment (which has been set to the
* first one containing payload)
*/
seg_len -= in_off;
next_frag += in_off;
in_off = 0;
do {
efsys_dma_addr_t frag_addr = next_frag;
size_t frag_len;
/*
* It is assumed here that there is no
* limitation on address boundary
* crossing by DMA descriptor.
*/
frag_len = MIN(seg_len, txq->dma_desc_size_max);
next_frag += frag_len;
seg_len -= frag_len;
pkt_len -= frag_len;
efx_tx_qdesc_dma_create(txq->common,
frag_addr, frag_len,
(pkt_len == 0),
pend++);
pkt_descs++;
} while (seg_len != 0);
}
added += pkt_descs;
fill_level += pkt_descs;
if (unlikely(fill_level > hard_max_fill)) {
/*
* Our estimation for maximum number of descriptors
* required to send a packet seems to be wrong.
* Try to reap (if we haven't yet).
*/
if (!reap_done) {
sfc_efx_tx_reap(txq);
reap_done = B_TRUE;
fill_level = added - txq->completed;
if (fill_level > hard_max_fill) {
pend -= pkt_descs;
break;
}
} else {
pend -= pkt_descs;
break;
}
}
/* Assign mbuf to the last used desc */
txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp;
}
if (likely(pkts_sent > 0)) {
rc = efx_tx_qdesc_post(txq->common, txq->pend_desc,
pend - &txq->pend_desc[0],
txq->completed, &txq->added);
SFC_ASSERT(rc == 0);
if (likely(pushed != txq->added))
efx_tx_qpush(txq->common, txq->added, pushed);
}
#if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE
if (!reap_done)
sfc_efx_tx_reap(txq);
#endif
done:
return pkts_sent;
}
struct sfc_txq *
sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq)
{
const struct sfc_dp_queue *dpq = &dp_txq->dpq;
struct rte_eth_dev *eth_dev;
struct sfc_adapter *sa;
struct sfc_txq *txq;
SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
eth_dev = &rte_eth_devices[dpq->port_id];
sa = eth_dev->data->dev_private;
SFC_ASSERT(dpq->queue_id < sa->txq_count);
txq = sa->txq_info[dpq->queue_id].txq;
SFC_ASSERT(txq != NULL);
return txq;
}
static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings;
static int
sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc,
unsigned int *txq_entries,
unsigned int *evq_entries,
unsigned int *txq_max_fill_level)
{
*txq_entries = nb_tx_desc;
*evq_entries = nb_tx_desc;
*txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries);
return 0;
}
static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate;
static int
sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id,
const struct rte_pci_addr *pci_addr,
int socket_id,
const struct sfc_dp_tx_qcreate_info *info,
struct sfc_dp_txq **dp_txqp)
{
struct sfc_efx_txq *txq;
struct sfc_txq *ctrl_txq;
int rc;
rc = ENOMEM;
txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq),
RTE_CACHE_LINE_SIZE, socket_id);
if (txq == NULL)
goto fail_txq_alloc;
sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr);
rc = ENOMEM;
txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc",
EFX_TXQ_LIMIT(info->txq_entries),
sizeof(*txq->pend_desc), 0,
socket_id);
if (txq->pend_desc == NULL)
goto fail_pend_desc_alloc;
rc = ENOMEM;
txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring",
info->txq_entries,
sizeof(*txq->sw_ring),
RTE_CACHE_LINE_SIZE, socket_id);
if (txq->sw_ring == NULL)
goto fail_sw_ring_alloc;
ctrl_txq = sfc_txq_by_dp_txq(&txq->dp);
if (ctrl_txq->evq->sa->tso) {
rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring,
info->txq_entries, socket_id);
if (rc != 0)
goto fail_alloc_tsoh_objs;
}
txq->evq = ctrl_txq->evq;
txq->ptr_mask = info->txq_entries - 1;
txq->max_fill_level = info->max_fill_level;
txq->free_thresh = info->free_thresh;
txq->dma_desc_size_max = info->dma_desc_size_max;
*dp_txqp = &txq->dp;
return 0;
fail_alloc_tsoh_objs:
rte_free(txq->sw_ring);
fail_sw_ring_alloc:
rte_free(txq->pend_desc);
fail_pend_desc_alloc:
rte_free(txq);
fail_txq_alloc:
return rc;
}
static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy;
static void
sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq)
{
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1);
rte_free(txq->sw_ring);
rte_free(txq->pend_desc);
rte_free(txq);
}
static sfc_dp_tx_qstart_t sfc_efx_tx_qstart;
static int
sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq,
__rte_unused unsigned int evq_read_ptr,
unsigned int txq_desc_index)
{
/* libefx-based datapath is specific to libefx-based PMD */
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq);
txq->common = ctrl_txq->common;
txq->pending = txq->completed = txq->added = txq_desc_index;
txq->hw_vlan_tci = 0;
txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING);
return 0;
}
static sfc_dp_tx_qstop_t sfc_efx_tx_qstop;
static void
sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq,
__rte_unused unsigned int *evq_read_ptr)
{
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING;
}
static sfc_dp_tx_qreap_t sfc_efx_tx_qreap;
static void
sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq)
{
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
unsigned int txds;
sfc_efx_tx_reap(txq);
for (txds = 0; txds <= txq->ptr_mask; txds++) {
if (txq->sw_ring[txds].mbuf != NULL) {
rte_pktmbuf_free(txq->sw_ring[txds].mbuf);
txq->sw_ring[txds].mbuf = NULL;
}
}
txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED;
}
static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status;
static int
sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset)
{
struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
if (unlikely(offset > txq->ptr_mask))
return -EINVAL;
if (unlikely(offset >= txq->max_fill_level))
return RTE_ETH_TX_DESC_UNAVAIL;
/*
* Poll EvQ to derive up-to-date 'txq->pending' figure;
* it is required for the queue to be running, but the
* check is omitted because API design assumes that it
* is the duty of the caller to satisfy all conditions
*/
SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) ==
SFC_EFX_TXQ_FLAG_RUNNING);
sfc_ev_qpoll(txq->evq);
/*
* Ring tail is 'txq->pending', and although descriptors
* between 'txq->completed' and 'txq->pending' are still
* in use by the driver, they should be reported as DONE
*/
if (unlikely(offset < (txq->added - txq->pending)))
return RTE_ETH_TX_DESC_FULL;
/*
* There is no separate return value for unused descriptors;
* the latter will be reported as DONE because genuine DONE
* descriptors will be freed anyway in SW on the next burst
*/
return RTE_ETH_TX_DESC_DONE;
}
struct sfc_dp_tx sfc_efx_tx = {
.dp = {
.name = SFC_KVARG_DATAPATH_EFX,
.type = SFC_DP_TX,
.hw_fw_caps = 0,
},
.features = SFC_DP_TX_FEAT_VLAN_INSERT |
SFC_DP_TX_FEAT_TSO |
SFC_DP_TX_FEAT_MULTI_POOL |
SFC_DP_TX_FEAT_REFCNT |
SFC_DP_TX_FEAT_MULTI_SEG,
.qsize_up_rings = sfc_efx_tx_qsize_up_rings,
.qcreate = sfc_efx_tx_qcreate,
.qdestroy = sfc_efx_tx_qdestroy,
.qstart = sfc_efx_tx_qstart,
.qstop = sfc_efx_tx_qstop,
.qreap = sfc_efx_tx_qreap,
.qdesc_status = sfc_efx_tx_qdesc_status,
.pkt_burst = sfc_efx_xmit_pkts,
};