iflib: Better control over queue core assignment

By default, cores are now assigned to queues in a sequential manner rather than all NICs starting at the first core. On a four-core system with two NICs each using two queue pairs, the nic:queue -> core mapping has changed from this: 0:0 -> 0, 0:1 -> 1 1:0 -> 0, 1:1 -> 1 To this: 0:0 -> 0, 0:1 -> 1 1:0 -> 2, 1:1 -> 3 Additionally, a device can now be configured to use separate cores for TX and RX queues. Two new tunables have been added, dev.X.Y.iflib.separate_txrx and dev.X.Y.iflib.core_offset. If core_offset is set, the NIC is not part of the auto-assigned sequence. Reviewed by: marius MFC after: 2 weeks Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D20029
svn path=/head/; revision=346708
2019-04-25 21:24:56 +00:00 · 2019-04-25 21:24:56 +00:00 · f154ece02e · 2020-12-20 02:59:44 +00:00
commit f154ece02e
parent f0baac9e32
2 changed files with 109 additions and 3 deletions
--- a/share/man/man4/iflib.4
+++ b/share/man/man4/iflib.4
@ -55,6 +55,16 @@ If zero, the number of TX queues is derived from the number of cores on the
 socket connected to the controller.
 .It Va disable_msix
 Disables MSI-X interrupts for the device.
 .It Va core_offset
 Specifies a starting core offset to assign queues to.
 If the value is unspecified or 65535, cores are assigned sequentially across
 controllers.
 .It Va separate_txrx
 Requests that RX and TX queues not be paired on the same core.
 If this is zero or not set, an RX and TX queue pair will be assigned to each
 core.
 When set to a non-zero value, TX queues are assigned to cores following the
 last RX queue.
 .El
 .Pp
 These
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@ -189,6 +189,9 @@ struct iflib_ctx {
 	uint16_t ifc_sysctl_qs_eq_override;
 	uint16_t ifc_sysctl_rx_budget;
 	uint16_t ifc_sysctl_tx_abdicate;
 	uint16_t ifc_sysctl_core_offset;
 #define	CORE_OFFSET_UNSPECIFIED	0xffff
 	uint8_t  ifc_sysctl_separate_txrx;
 	qidx_t ifc_sysctl_ntxds[8];
 	qidx_t ifc_sysctl_nrxds[8];
@ -723,6 +726,18 @@ static void iflib_free_intr_mem(if_ctx_t ctx);
 static struct mbuf * iflib_fixup_rx(struct mbuf *m);
 #endif
 static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
    SLIST_HEAD_INITIALIZER(cpu_offsets);
 struct cpu_offset {
 	SLIST_ENTRY(cpu_offset) entries;
 	cpuset_t	set;
 	unsigned int	refcount;
 	uint16_t	offset;
 };
 static struct mtx cpu_offset_mtx;
 MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
    MTX_DEF);
 NETDUMP_DEFINE(iflib);
 #ifdef DEV_NETMAP
@ -4461,6 +4476,71 @@ iflib_rem_pfil(if_ctx_t ctx)
 	pfil_head_unregister(pfil);
 }
 static uint16_t
 get_ctx_core_offset(if_ctx_t ctx)
 {
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	struct cpu_offset *op;
 	uint16_t qc;
 	uint16_t ret = ctx->ifc_sysctl_core_offset;
 	if (ret != CORE_OFFSET_UNSPECIFIED)
 		return (ret);
 	if (ctx->ifc_sysctl_separate_txrx)
 		qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
 	else
 		qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
 	mtx_lock(&cpu_offset_mtx);
 	SLIST_FOREACH(op, &cpu_offsets, entries) {
 		if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
 			ret = op->offset;
 			op->offset += qc;
 			MPASS(op->refcount < UINT_MAX);
 			op->refcount++;
 			break;
 		}
 	}
 	if (ret == CORE_OFFSET_UNSPECIFIED) {
 		ret = 0;
 		op = malloc(sizeof(struct cpu_offset), M_IFLIB,
 		    M_NOWAIT | M_ZERO);
 		if (op == NULL) {
 			device_printf(ctx->ifc_dev,
 			    "allocation for cpu offset failed.\n");
 		} else {
 			op->offset = qc;
 			op->refcount = 1;
 			CPU_COPY(&ctx->ifc_cpus, &op->set);
 			SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
 		}
 	}
 	mtx_unlock(&cpu_offset_mtx);
 	return (ret);
 }
 static void
 unref_ctx_core_offset(if_ctx_t ctx)
 {
 	struct cpu_offset *op, *top;
 	mtx_lock(&cpu_offset_mtx);
 	SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) {
 		if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
 			MPASS(op->refcount > 0);
 			op->refcount--;
 			if (op->refcount == 0) {
 				SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries);
 				free(op, M_IFLIB);
 			}
 			break;
 		}
 	}
 	mtx_unlock(&cpu_offset_mtx);
 }
 int
 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
 {
@ -4612,6 +4692,11 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	if ((err = iflib_qset_structures_setup(ctx)))
 		goto fail_queues;
 	/*
 	 * Now that we know how many queues there are, get the core offset.
 	 */
 	ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
 	/*
 	 * Group taskqueues aren't properly set up until SMP is started,
 	 * so we disable interrupts until we can handle them post
@ -5037,6 +5122,7 @@ iflib_device_deregister(if_ctx_t ctx)
 	iflib_rx_structures_free(ctx);
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
 	unref_ctx_core_offset(ctx);
 	STATE_LOCK_DESTROY(ctx);
 	free(ctx, M_IFLIB);
 	return (0);
@ -5655,7 +5741,7 @@ find_child_with_core(int cpu, struct cpu_group *grp)
 * Find the nth "close" core to the specified core
 * "close" is defined as the deepest level that shares
 * at least an L2 cache.  With threads, this will be
- * threads on the same core.  If the sahred cache is L3
+ * threads on the same core.  If the shared cache is L3
 * or higher, simply returns the same core.
 */
 static int
@ -5739,10 +5825,13 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
    const char *name)
 {
 	device_t dev;
-	int err, cpuid, tid;
+	int co, cpuid, err, tid;
 	dev = ctx->ifc_dev;
-	cpuid = find_nth(ctx, qid);
+	co = ctx->ifc_sysctl_core_offset;
 	if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
 		co += ctx->ifc_softc_ctx.isc_nrxqsets;
 	cpuid = find_nth(ctx, qid + co);
 	tid = get_core_offset(ctx, type, qid);
 	MPASS(tid >= 0);
 	cpuid = find_close_core(cpuid, tid);
@ -6344,6 +6433,13 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
 		       "cause tx to abdicate instead of running to completion");
 	ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
 		       CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
 		       "offset to start using cores at");
 	SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
 		       CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
 		       "use separate cores for TX and RX");
 	/* XXX change for per-queue sizes */
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",