iflib updates and fixes:

-    reset gen on down
-    initialize admin task statically
-    drain mp_ring on down
-    don't drop context lock on stop
-    reset error stats on down
-    fix typo in min_latency sysctl
-    return ENOBUFS from if_transmit if the driver isn't running or the link is down

Submitted by:	mmacy@nextbsd.org
Reviewed by:	shurd
MFC after:	2 days
Sponsored by:	Isilon and Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D8558
This commit is contained in:
sbruno 2016-11-18 04:19:21 +00:00
parent 98ec779db4
commit 265e1db5f7

View File

@ -93,7 +93,6 @@ __FBSDID("$FreeBSD$");
#include <x86/iommu/busdma_dmar.h>
#endif
/*
* enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references
*/
@ -500,7 +499,7 @@ static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
static int iflib_min_tx_latency = 0;
SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
&iflib_min_tx_latency, 0, "minimize transmit latency at the possibel expense of throughput");
&iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
#if IFLIB_DEBUG_COUNTERS
@ -595,10 +594,23 @@ SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
&iflib_verbose_debug, 0, "enable verbose debugging");
#define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
static void
iflib_debug_reset(void)
{
iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
iflib_txq_drain_flushing = iflib_txq_drain_oactive =
iflib_txq_drain_notready = iflib_txq_drain_encapfail =
iflib_encap_load_mbuf_fail = iflib_encap_txq_avail_fail =
iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables =
iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail =
iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input =
iflib_rx_mbuf_null = iflib_rxd_flush = 0;
}
#else
#define DBG_COUNTER_INC(name)
static void iflib_debug_reset(void) {}
#endif
@ -619,6 +631,7 @@ static int iflib_register(if_ctx_t);
static void iflib_init_locked(if_ctx_t ctx);
static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
static void iflib_add_device_sysctl_post(if_ctx_t ctx);
static void iflib_ifmp_purge(iflib_txq_t txq);
#ifdef DEV_NETMAP
@ -1266,11 +1279,6 @@ iflib_txsd_alloc(iflib_txq_t txq)
sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
goto fail;
}
#ifdef IFLIB_DIAGNOSTICS
device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
#endif
if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@ -1287,11 +1295,6 @@ iflib_txsd_alloc(iflib_txq_t txq)
goto fail;
}
#ifdef IFLIB_DIAGNOSTICS
device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
scctx->isc_tx_tso_size_max, ntsosegments,
scctx->isc_tx_tso_segsize_max);
#endif
if (!(txq->ift_sds.ifsd_flags =
(uint8_t *) malloc(sizeof(uint8_t) *
scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
@ -1921,20 +1924,23 @@ iflib_stop(if_ctx_t ctx)
if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
msleep(ctx, &ctx->ifc_mtx, PUSER, "iflib_init", hz);
DELAY(100000);
IFDI_STOP(ctx);
DELAY(100000);
iflib_debug_reset();
/* Wait for current tx queue users to exit to disarm watchdog timer. */
for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
/* make sure all transmitters have completed before proceeding XXX */
/* clean any enqueued buffers */
iflib_txq_check_drain(txq, 0);
iflib_ifmp_purge(txq);
/* Free any existing tx buffers. */
for (j = 0; j < txq->ift_size; j++) {
iflib_txsd_free(ctx, txq, j);
}
txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
txq->ift_in_use = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
txq->ift_pullups = 0;
@ -1951,7 +1957,6 @@ iflib_stop(if_ctx_t ctx)
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
iflib_fl_bufs_free(fl);
}
IFDI_STOP(ctx);
}
static iflib_rxsd_t
@ -2890,22 +2895,27 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
}
consumed = mcast_sent = bytes_sent = pkt_sent = 0;
count = MIN(avail, TX_BATCH_SIZE);
#ifdef INVARIANTS
if (iflib_verbose_debug)
printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
avail, ctx->ifc_flags, TXQ_AVAIL(txq));
#endif
for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) {
mp = _ring_peek_one(r, cidx, i);
MPASS(mp != NULL && *mp != NULL);
in_use_prev = txq->ift_in_use;
err = iflib_encap(txq, mp);
/*
* What other errors should we bail out for?
*/
if (err == ENOBUFS) {
if ((err = iflib_encap(txq, mp)) == ENOBUFS) {
DBG_COUNTER_INC(txq_drain_encapfail);
/* no room - bail out */
break;
}
consumed++;
if (err)
if (err) {
DBG_COUNTER_INC(txq_drain_encapfail);
/* we can't send this packet - skip it */
continue;
}
pkt_sent++;
m = *mp;
DBG_COUNTER_INC(tx_sent);
@ -2936,10 +2946,58 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
if (mcast_sent)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
#ifdef INVARIANTS
if (iflib_verbose_debug)
printf("consumed=%d\n", consumed);
#endif
return (consumed);
}
static uint32_t
iflib_txq_drain_always(struct ifmp_ring *r)
{
return (1);
}
static uint32_t
iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
{
int i, avail;
struct mbuf **mp;
iflib_txq_t txq;
txq = r->cookie;
txq->ift_qstatus = IFLIB_QUEUE_IDLE;
CALLOUT_LOCK(txq);
callout_stop(&txq->ift_timer);
callout_stop(&txq->ift_db_check);
CALLOUT_UNLOCK(txq);
avail = IDXDIFF(pidx, cidx, r->size);
for (i = 0; i < avail; i++) {
mp = _ring_peek_one(r, cidx, i);
m_freem(*mp);
}
MPASS(ifmp_ring_is_stalled(r) == 0);
return (avail);
}
static void
iflib_ifmp_purge(iflib_txq_t txq)
{
struct ifmp_ring *r;
r = txq->ift_br[0];
r->drain = iflib_txq_drain_free;
r->can_drain = iflib_txq_drain_always;
ifmp_ring_check_drainage(r, r->size);
r->drain = iflib_txq_drain;
r->can_drain = iflib_txq_can_drain;
}
static void
_task_fn_tx(void *context)
{
@ -3073,7 +3131,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
m_freem(m);
return (0);
return (ENOBUFS);
}
MPASS(m->m_nextpkt == NULL);
@ -3590,6 +3648,10 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
if (scctx->isc_rss_table_size == 0)
scctx->isc_rss_table_size = 64;
scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
/* XXX format name */
taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
/*
** Now setup MSI or MSI/X, should
** return us the number of supported
@ -4342,6 +4404,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
gtask = &ctx->ifc_txqs[qid].ift_task;
tqg = qgroup_if_io_tqg;
fn = _task_fn_tx;
GROUPTASK_INIT(gtask, 0, fn, q);
break;
case IFLIB_INTR_RX:
q = &ctx->ifc_rxqs[qid];
@ -4349,9 +4412,11 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
gtask = &ctx->ifc_rxqs[qid].ifr_task;
tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
GROUPTASK_INIT(gtask, 0, fn, q);
break;
case IFLIB_INTR_ADMIN:
q = ctx;
tqrid = -1;
info = &ctx->ifc_filter_info;
gtask = &ctx->ifc_admin_task;
tqg = qgroup_if_config_tqg;
@ -4360,17 +4425,19 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
default:
panic("unknown net intr type");
}
GROUPTASK_INIT(gtask, 0, fn, q);
info->ifi_filter = filter;
info->ifi_filter_arg = filter_arg;
info->ifi_task = gtask;
/* XXX query cpu that rid belongs to */
err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info, name);
if (err != 0)
if (err != 0) {
device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err);
return (err);
}
if (type == IFLIB_INTR_ADMIN)
return (0);
if (tqrid != -1) {
cpuid = find_nth(ctx, &cpus, qid);
taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name);
@ -4402,13 +4469,6 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void
tqg = qgroup_if_io_tqg;
fn = _task_fn_rx;
break;
case IFLIB_INTR_ADMIN:
q = ctx;
gtask = &ctx->ifc_admin_task;
tqg = qgroup_if_config_tqg;
rid = -1;
fn = _task_fn_admin;
break;
case IFLIB_INTR_IOV:
q = ctx;
gtask = &ctx->ifc_vflr_task;
@ -4965,7 +5025,6 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
CTLFLAG_RD, &txq->ift_br[0]->abdications,
"# of consumer abdications in the mp_ring for this queue");
}
if (scctx->isc_nrxqsets > 100)
@ -4987,6 +5046,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
CTLFLAG_RD,
&rxq->ifr_cq_cidx, 1, "Consumer Index");
}
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,