iflib: Better control over queue core assignment
By default, cores are now assigned to queues in a sequential manner rather than all NICs starting at the first core. On a four-core system with two NICs each using two queue pairs, the nic:queue -> core mapping has changed from this: 0:0 -> 0, 0:1 -> 1 1:0 -> 0, 1:1 -> 1 To this: 0:0 -> 0, 0:1 -> 1 1:0 -> 2, 1:1 -> 3 Additionally, a device can now be configured to use separate cores for TX and RX queues. Two new tunables have been added, dev.X.Y.iflib.separate_txrx and dev.X.Y.iflib.core_offset. If core_offset is set, the NIC is not part of the auto-assigned sequence. Reviewed by: marius MFC after: 2 weeks Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D20029
This commit is contained in:
parent
f0baac9e32
commit
f154ece02e
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=346708
@ -55,6 +55,16 @@ If zero, the number of TX queues is derived from the number of cores on the
|
|||||||
socket connected to the controller.
|
socket connected to the controller.
|
||||||
.It Va disable_msix
|
.It Va disable_msix
|
||||||
Disables MSI-X interrupts for the device.
|
Disables MSI-X interrupts for the device.
|
||||||
|
.It Va core_offset
|
||||||
|
Specifies a starting core offset to assign queues to.
|
||||||
|
If the value is unspecified or 65535, cores are assigned sequentially across
|
||||||
|
controllers.
|
||||||
|
.It Va separate_txrx
|
||||||
|
Requests that RX and TX queues not be paired on the same core.
|
||||||
|
If this is zero or not set, an RX and TX queue pair will be assigned to each
|
||||||
|
core.
|
||||||
|
When set to a non-zero value, TX queues are assigned to cores following the
|
||||||
|
last RX queue.
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
These
|
These
|
||||||
|
102
sys/net/iflib.c
102
sys/net/iflib.c
@ -189,6 +189,9 @@ struct iflib_ctx {
|
|||||||
uint16_t ifc_sysctl_qs_eq_override;
|
uint16_t ifc_sysctl_qs_eq_override;
|
||||||
uint16_t ifc_sysctl_rx_budget;
|
uint16_t ifc_sysctl_rx_budget;
|
||||||
uint16_t ifc_sysctl_tx_abdicate;
|
uint16_t ifc_sysctl_tx_abdicate;
|
||||||
|
uint16_t ifc_sysctl_core_offset;
|
||||||
|
#define CORE_OFFSET_UNSPECIFIED 0xffff
|
||||||
|
uint8_t ifc_sysctl_separate_txrx;
|
||||||
|
|
||||||
qidx_t ifc_sysctl_ntxds[8];
|
qidx_t ifc_sysctl_ntxds[8];
|
||||||
qidx_t ifc_sysctl_nrxds[8];
|
qidx_t ifc_sysctl_nrxds[8];
|
||||||
@ -723,6 +726,18 @@ static void iflib_free_intr_mem(if_ctx_t ctx);
|
|||||||
static struct mbuf * iflib_fixup_rx(struct mbuf *m);
|
static struct mbuf * iflib_fixup_rx(struct mbuf *m);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
|
||||||
|
SLIST_HEAD_INITIALIZER(cpu_offsets);
|
||||||
|
struct cpu_offset {
|
||||||
|
SLIST_ENTRY(cpu_offset) entries;
|
||||||
|
cpuset_t set;
|
||||||
|
unsigned int refcount;
|
||||||
|
uint16_t offset;
|
||||||
|
};
|
||||||
|
static struct mtx cpu_offset_mtx;
|
||||||
|
MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
|
||||||
|
MTX_DEF);
|
||||||
|
|
||||||
NETDUMP_DEFINE(iflib);
|
NETDUMP_DEFINE(iflib);
|
||||||
|
|
||||||
#ifdef DEV_NETMAP
|
#ifdef DEV_NETMAP
|
||||||
@ -4461,6 +4476,71 @@ iflib_rem_pfil(if_ctx_t ctx)
|
|||||||
pfil_head_unregister(pfil);
|
pfil_head_unregister(pfil);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint16_t
|
||||||
|
get_ctx_core_offset(if_ctx_t ctx)
|
||||||
|
{
|
||||||
|
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
|
||||||
|
struct cpu_offset *op;
|
||||||
|
uint16_t qc;
|
||||||
|
uint16_t ret = ctx->ifc_sysctl_core_offset;
|
||||||
|
|
||||||
|
if (ret != CORE_OFFSET_UNSPECIFIED)
|
||||||
|
return (ret);
|
||||||
|
|
||||||
|
if (ctx->ifc_sysctl_separate_txrx)
|
||||||
|
qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
|
||||||
|
else
|
||||||
|
qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
|
||||||
|
|
||||||
|
mtx_lock(&cpu_offset_mtx);
|
||||||
|
SLIST_FOREACH(op, &cpu_offsets, entries) {
|
||||||
|
if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
|
||||||
|
ret = op->offset;
|
||||||
|
op->offset += qc;
|
||||||
|
MPASS(op->refcount < UINT_MAX);
|
||||||
|
op->refcount++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ret == CORE_OFFSET_UNSPECIFIED) {
|
||||||
|
ret = 0;
|
||||||
|
op = malloc(sizeof(struct cpu_offset), M_IFLIB,
|
||||||
|
M_NOWAIT | M_ZERO);
|
||||||
|
if (op == NULL) {
|
||||||
|
device_printf(ctx->ifc_dev,
|
||||||
|
"allocation for cpu offset failed.\n");
|
||||||
|
} else {
|
||||||
|
op->offset = qc;
|
||||||
|
op->refcount = 1;
|
||||||
|
CPU_COPY(&ctx->ifc_cpus, &op->set);
|
||||||
|
SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mtx_unlock(&cpu_offset_mtx);
|
||||||
|
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
unref_ctx_core_offset(if_ctx_t ctx)
|
||||||
|
{
|
||||||
|
struct cpu_offset *op, *top;
|
||||||
|
|
||||||
|
mtx_lock(&cpu_offset_mtx);
|
||||||
|
SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) {
|
||||||
|
if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
|
||||||
|
MPASS(op->refcount > 0);
|
||||||
|
op->refcount--;
|
||||||
|
if (op->refcount == 0) {
|
||||||
|
SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries);
|
||||||
|
free(op, M_IFLIB);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mtx_unlock(&cpu_offset_mtx);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
|
iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
|
||||||
{
|
{
|
||||||
@ -4612,6 +4692,11 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
|
|||||||
if ((err = iflib_qset_structures_setup(ctx)))
|
if ((err = iflib_qset_structures_setup(ctx)))
|
||||||
goto fail_queues;
|
goto fail_queues;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that we know how many queues there are, get the core offset.
|
||||||
|
*/
|
||||||
|
ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Group taskqueues aren't properly set up until SMP is started,
|
* Group taskqueues aren't properly set up until SMP is started,
|
||||||
* so we disable interrupts until we can handle them post
|
* so we disable interrupts until we can handle them post
|
||||||
@ -5037,6 +5122,7 @@ iflib_device_deregister(if_ctx_t ctx)
|
|||||||
iflib_rx_structures_free(ctx);
|
iflib_rx_structures_free(ctx);
|
||||||
if (ctx->ifc_flags & IFC_SC_ALLOCATED)
|
if (ctx->ifc_flags & IFC_SC_ALLOCATED)
|
||||||
free(ctx->ifc_softc, M_IFLIB);
|
free(ctx->ifc_softc, M_IFLIB);
|
||||||
|
unref_ctx_core_offset(ctx);
|
||||||
STATE_LOCK_DESTROY(ctx);
|
STATE_LOCK_DESTROY(ctx);
|
||||||
free(ctx, M_IFLIB);
|
free(ctx, M_IFLIB);
|
||||||
return (0);
|
return (0);
|
||||||
@ -5655,7 +5741,7 @@ find_child_with_core(int cpu, struct cpu_group *grp)
|
|||||||
* Find the nth "close" core to the specified core
|
* Find the nth "close" core to the specified core
|
||||||
* "close" is defined as the deepest level that shares
|
* "close" is defined as the deepest level that shares
|
||||||
* at least an L2 cache. With threads, this will be
|
* at least an L2 cache. With threads, this will be
|
||||||
* threads on the same core. If the sahred cache is L3
|
* threads on the same core. If the shared cache is L3
|
||||||
* or higher, simply returns the same core.
|
* or higher, simply returns the same core.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
@ -5739,10 +5825,13 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
|
|||||||
const char *name)
|
const char *name)
|
||||||
{
|
{
|
||||||
device_t dev;
|
device_t dev;
|
||||||
int err, cpuid, tid;
|
int co, cpuid, err, tid;
|
||||||
|
|
||||||
dev = ctx->ifc_dev;
|
dev = ctx->ifc_dev;
|
||||||
cpuid = find_nth(ctx, qid);
|
co = ctx->ifc_sysctl_core_offset;
|
||||||
|
if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
|
||||||
|
co += ctx->ifc_softc_ctx.isc_nrxqsets;
|
||||||
|
cpuid = find_nth(ctx, qid + co);
|
||||||
tid = get_core_offset(ctx, type, qid);
|
tid = get_core_offset(ctx, type, qid);
|
||||||
MPASS(tid >= 0);
|
MPASS(tid >= 0);
|
||||||
cpuid = find_close_core(cpuid, tid);
|
cpuid = find_close_core(cpuid, tid);
|
||||||
@ -6344,6 +6433,13 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
|
|||||||
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
|
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
|
||||||
CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
|
CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
|
||||||
"cause tx to abdicate instead of running to completion");
|
"cause tx to abdicate instead of running to completion");
|
||||||
|
ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
|
||||||
|
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
|
||||||
|
CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
|
||||||
|
"offset to start using cores at");
|
||||||
|
SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
|
||||||
|
CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
|
||||||
|
"use separate cores for TX and RX");
|
||||||
|
|
||||||
/* XXX change for per-queue sizes */
|
/* XXX change for per-queue sizes */
|
||||||
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
|
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
|
||||||
|
Loading…
Reference in New Issue
Block a user