Support attaching tx queues to cpus

This will attempt to use a different thread/core on the same L2
cache when possible, or use the same cpu as the rx thread when not.
If SMP isn't enabled, don't go looking for cores to use. This is mostly
useful when using shared TX/RX queues.

Reviewed by:	sbruno
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D12446
This commit is contained in:
Stephen Hurd 2017-12-20 01:03:34 +00:00
parent f27d3a8a72
commit b103855e18
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=327013

View File

@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_acpi.h"
#include "opt_sched.h"
#include <sys/param.h>
#include <sys/types.h>
@ -5044,25 +5045,136 @@ iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
}
#ifdef SMP
static int
find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
find_nth(if_ctx_t ctx, int qid)
{
cpuset_t cpus;
int i, cpuid, eqid, count;
CPU_COPY(&ctx->ifc_cpus, cpus);
count = CPU_COUNT(cpus);
CPU_COPY(&ctx->ifc_cpus, &cpus);
count = CPU_COUNT(&cpus);
eqid = qid % count;
/* clear up to the qid'th bit */
for (i = 0; i < eqid; i++) {
cpuid = CPU_FFS(cpus);
cpuid = CPU_FFS(&cpus);
MPASS(cpuid != 0);
CPU_CLR(cpuid-1, cpus);
CPU_CLR(cpuid-1, &cpus);
}
cpuid = CPU_FFS(cpus);
cpuid = CPU_FFS(&cpus);
MPASS(cpuid != 0);
return (cpuid-1);
}
#ifdef SCHED_ULE
extern struct cpu_group *cpu_top; /* CPU topology */
static int
find_child_with_core(int cpu, struct cpu_group *grp)
{
int i;
if (grp->cg_children == 0)
return -1;
MPASS(grp->cg_child);
for (i = 0; i < grp->cg_children; i++) {
if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
return i;
}
return -1;
}
/*
* Find the nth thread on the specified core
*/
static int
find_thread(int cpu, int thread_num)
{
struct cpu_group *grp;
int i;
cpuset_t cs;
grp = cpu_top;
if (grp == NULL)
return cpu;
i = 0;
while ((i = find_child_with_core(cpu, grp)) != -1) {
/* If the child only has one cpu, don't descend */
if (grp->cg_child[i].cg_count <= 1)
break;
grp = &grp->cg_child[i];
}
/* If they don't share at least an L2 cache, use the same CPU */
if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
return cpu;
/* Now pick one */
CPU_COPY(&grp->cg_mask, &cs);
for (i = thread_num % grp->cg_count; i > 0; i--) {
MPASS(CPU_FFS(&cs));
CPU_CLR(CPU_FFS(&cs) - 1, &cs);
}
MPASS(CPU_FFS(&cs));
return CPU_FFS(&cs) - 1;
}
#else
static int
find_thread(int cpu, int thread_num __unused)
{
return cpu_id
}
#endif
static int
get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid)
{
switch (type) {
case IFLIB_INTR_TX:
/* TX queues get threads on the same core as the corresponding RX queue */
/* XXX handle multiple RX threads per core and more than two threads per core */
return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
case IFLIB_INTR_RX:
case IFLIB_INTR_RXTX:
/* RX queues get the first thread on their core */
return qid / CPU_COUNT(&ctx->ifc_cpus);
default:
return -1;
}
}
#else
#define get_thread_num(ctx, type, qid) CPU_FIRST()
#define find_thread(cpuid, tid) CPU_FIRST()
#define find_nth(ctx, gid) CPU_FIRST()
#endif
/* Just to avoid copy/paste */
static inline int
iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
{
int cpuid;
int err, tid;
cpuid = find_nth(ctx, qid);
tid = get_thread_num(ctx, type, qid);
MPASS(tid >= 0);
cpuid = find_thread(cpuid, tid);
err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
if (err) {
device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
return (err);
}
#ifdef notyet
if (cpuid > ctx->ifc_cpuid_highest)
ctx->ifc_cpuid_highest = cpuid;
#endif
MPASS(gtask->gt_taskqueue != NULL);
return 0;
}
int
iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
iflib_intr_type_t type, driver_filter_t *filter,
@ -5071,9 +5183,8 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
struct grouptask *gtask;
struct taskqgroup *tqg;
iflib_filter_info_t info;
cpuset_t cpus;
gtask_fn_t *fn;
int tqrid, err, cpuid;
int tqrid, err;
driver_filter_t *intr_fast;
void *q;
@ -5136,8 +5247,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
return (0);
if (tqrid != -1) {
cpuid = find_nth(ctx, &cpus, qid);
taskqgroup_attach_cpu(tqg, gtask, q, cpuid, rman_get_start(irq->ii_res), name);
err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
if (err)
return (err);
} else {
taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
}
@ -5153,6 +5265,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
gtask_fn_t *fn;
void *q;
int irq_num = -1;
int err;
switch (type) {
case IFLIB_INTR_TX:
@ -5181,7 +5294,14 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
panic("unknown net intr type");
}
GROUPTASK_INIT(gtask, 0, fn, q);
taskqgroup_attach(tqg, gtask, q, irq_num, name);
if (irq_num != -1) {
err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name);
if (err)
taskqgroup_attach(tqg, gtask, q, irq_num, name);
}
else {
taskqgroup_attach(tqg, gtask, q, irq_num, name);
}
}
void