Add knob to control tx ring abdication.

r323954 changed the mp ring behaviour when 64-bit atomics were
available to abdicate the TX ring rather than having one become a
consumer thereby running to completion on TX. The consumer of the mp
ring was then triggered in the tx task rather than blocking the TX call.
While this significantly lowered the number of RX drops in small-packet
forwarding, it also negatively impacts TX performance.

With this change, the default behaviour is reverted, causing one TX ring
to become a consumer during the enqueue call. A new sysctl,
dev.X.Y.iflib.tx_abdicate is added to control this behaviour.

Reviewed by:	gallatin
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D16302
This commit is contained in:
shurd 2018-07-20 17:45:26 +00:00
parent 4db9126b14
commit 06b406febd
3 changed files with 54 additions and 16 deletions

View File

@ -197,6 +197,7 @@ struct iflib_ctx {
uint16_t ifc_sysctl_nrxqs;
uint16_t ifc_sysctl_qs_eq_override;
uint16_t ifc_sysctl_rx_budget;
uint16_t ifc_sysctl_tx_abdicate;
qidx_t ifc_sysctl_ntxds[8];
qidx_t ifc_sysctl_nrxds[8];
@ -3756,6 +3757,7 @@ _task_fn_tx(void *context)
iflib_txq_t txq = context;
if_ctx_t ctx = txq->ift_ctx;
struct ifnet *ifp = ctx->ifc_ifp;
int abdicate = ctx->ifc_sysctl_tx_abdicate;
#ifdef IFLIB_DIAGNOSTICS
txq->ift_cpu_exec_count[curcpu]++;
@ -3769,7 +3771,14 @@ _task_fn_tx(void *context)
return;
}
if (txq->ift_db_pending)
ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate);
else if (!abdicate)
ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
/*
* When abdicating, we always need to check drainage, not just when we don't enqueue
*/
if (abdicate)
ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
if (ctx->ifc_flags & IFC_LEGACY)
IFDI_INTR_ENABLE(ctx);
@ -3940,6 +3949,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
iflib_txq_t txq;
int err, qidx;
int abdicate = ctx->ifc_sysctl_tx_abdicate;
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
@ -3991,10 +4001,13 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
}
#endif
DBG_COUNTER_INC(tx_seen);
err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate);
GROUPTASK_ENQUEUE(&txq->ift_task);
if (err) {
if (abdicate)
GROUPTASK_ENQUEUE(&txq->ift_task);
if (err) {
if (!abdicate)
GROUPTASK_ENQUEUE(&txq->ift_task);
/* support forthcoming later */
#ifdef DRIVER_BACKPRESSURE
txq->ift_closed = TRUE;
@ -6200,6 +6213,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
"set the rx budget");
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
"cause tx to abdicate instead of running to completion");
/* XXX change for per-queue sizes */
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",

View File

@ -327,7 +327,7 @@ ifmp_ring_free(struct ifmp_ring *r)
*/
#ifdef NO_64BIT_ATOMICS
int
ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdicate)
{
union ring_state os, ns;
uint16_t pidx_start, pidx_stop;
@ -380,16 +380,24 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
*/
os.state = ns.state = r->state;
ns.pidx_tail = pidx_stop;
ns.flags = BUSY;
if (abdicate) {
if (os.flags == IDLE)
ns.flags = ABDICATED;
}
else {
ns.flags = BUSY;
}
r->state = ns.state;
counter_u64_add(r->enqueues, n);
/*
* Turn into a consumer if some other thread isn't active as a consumer
* already.
*/
if (os.flags != BUSY)
drain_ring_locked(r, ns, os.flags, budget);
if (!abdicate) {
/*
* Turn into a consumer if some other thread isn't active as a consumer
* already.
*/
if (os.flags != BUSY)
drain_ring_locked(r, ns, os.flags, budget);
}
mtx_unlock(&r->lock);
return (0);
@ -397,7 +405,7 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
#else
int
ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdicate)
{
union ring_state os, ns;
uint16_t pidx_start, pidx_stop;
@ -455,12 +463,26 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
do {
os.state = ns.state = r->state;
ns.pidx_tail = pidx_stop;
if (os.flags == IDLE)
ns.flags = ABDICATED;
if (abdicate) {
if (os.flags == IDLE)
ns.flags = ABDICATED;
}
else {
ns.flags = BUSY;
}
} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
critical_exit();
counter_u64_add(r->enqueues, n);
if (!abdicate) {
/*
* Turn into a consumer if some other thread isn't active as a consumer
* already.
*/
if (os.flags != BUSY)
drain_ring_lockless(r, ns, os.flags, budget);
}
return (0);
}
#endif

View File

@ -63,7 +63,7 @@ struct ifmp_ring {
int ifmp_ring_alloc(struct ifmp_ring **, int, void *, mp_ring_drain_t,
mp_ring_can_drain_t, struct malloc_type *, int);
void ifmp_ring_free(struct ifmp_ring *);
int ifmp_ring_enqueue(struct ifmp_ring *, void **, int, int);
int ifmp_ring_enqueue(struct ifmp_ring *, void **, int, int, int);
void ifmp_ring_check_drainage(struct ifmp_ring *, int);
void ifmp_ring_reset_stats(struct ifmp_ring *);
int ifmp_ring_is_idle(struct ifmp_ring *);