From 0b75ac779606d156ee1dddd5fd81b66799027469 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Wed, 18 Apr 2018 15:34:18 +0000 Subject: [PATCH] iflib: Fix queue distribution when there are no threads Previously, if there are no threads, all queues which targeted cores that share an L2 cache were bound to a single core. The intent is to distribute them across these cores. Reported by: olivier Reviewed by: sbruno Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D15120 --- sys/net/iflib.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 844d45dfa325..e7ba5f718383 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -5117,13 +5117,18 @@ find_child_with_core(int cpu, struct cpu_group *grp) } /* - * Find the nth thread on the specified core + * Find the nth "close" core to the specified core + * "close" is defined as the deepest level that shares + * at least an L2 cache. With threads, this will be + * threads on the same core. If the sahred cache is L3 + * or higher, simply returns the same core. */ static int -find_thread(int cpu, int thread_num) +find_close_core(int cpu, int core_offset) { struct cpu_group *grp; int i; + int fcpu; cpuset_t cs; grp = cpu_top; @@ -5143,7 +5148,19 @@ find_thread(int cpu, int thread_num) /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); - for (i = thread_num % grp->cg_count; i > 0; i--) { + + /* Add the selected CPU offset to core offset. */ + for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) { + if (fcpu - 1 == cpu) + break; + CPU_CLR(fcpu - 1, &cs); + } + MPASS(fcpu); + + core_offset += i; + + CPU_COPY(&grp->cg_mask, &cs); + for (i = core_offset % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } @@ -5152,31 +5169,31 @@ find_thread(int cpu, int thread_num) } #else static int -find_thread(int cpu, int thread_num __unused) +find_close_core(int cpu, int core_offset __unused) { return cpu; } #endif static int -get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) +get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: - /* TX queues get threads on the same core as the corresponding RX queue */ - /* XXX handle multiple RX threads per core and more than two threads per core */ + /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */ + /* XXX handle multiple RX threads per core and more than two core per L2 group */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: - /* RX queues get the first thread on their core */ + /* RX queues get the specified core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else -#define get_thread_num(ctx, type, qid) CPU_FIRST() -#define find_thread(cpuid, tid) CPU_FIRST() +#define get_core_offset(ctx, type, qid) CPU_FIRST() +#define find_close_core(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif @@ -5189,9 +5206,9 @@ iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, int err, tid; cpuid = find_nth(ctx, qid); - tid = get_thread_num(ctx, type, qid); + tid = get_core_offset(ctx, type, qid); MPASS(tid >= 0); - cpuid = find_thread(cpuid, tid); + cpuid = find_close_core(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); if (err) { device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);