ktls: Defer creation of threads and zones until first use.

Run ktls_init() when the first KTLS session is created rather than
unconditionally during boot.  This avoids creating unused threads and
allocating unused resources on systems which do not use KTLS.

Reviewed by:	gallatin, markj
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D32487
This commit is contained in:
John Baldwin 2021-10-14 15:48:34 -07:00
parent 86929782cf
commit a72ee35564

View File

@ -109,6 +109,9 @@ static struct proc *ktls_proc;
static uma_zone_t ktls_session_zone;
static uma_zone_t ktls_buffer_zone;
static uint16_t ktls_cpuid_lookup[MAXCPU];
static int ktls_init_state;
static struct sx ktls_init_lock;
SX_SYSINIT(ktls_init_lock, &ktls_init_lock, "ktls init");
SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Kernel TLS offload");
@ -379,12 +382,11 @@ ktls_free_mext_contig(struct mbuf *m)
uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0]));
}
static void
ktls_init(void *dummy __unused)
static int
ktls_init(void)
{
struct thread *td;
struct pcpu *pc;
cpuset_t mask;
int count, domain, error, i;
ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS,
@ -410,36 +412,40 @@ ktls_init(void *dummy __unused)
STAILQ_INIT(&ktls_wq[i].m_head);
STAILQ_INIT(&ktls_wq[i].so_head);
mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
if (error)
panic("Can't add KTLS thread %d error %d", i, error);
/*
* Bind threads to cores. If ktls_bind_threads is >
* 1, then we bind to the NUMA domain.
*/
if (ktls_bind_threads) {
if (ktls_bind_threads > 1) {
pc = pcpu_find(i);
domain = pc->pc_domain;
CPU_COPY(&cpuset_domain[domain], &mask);
count = ktls_domains[domain].count;
ktls_domains[domain].cpu[count] = i;
ktls_domains[domain].count++;
} else {
CPU_SETOF(i, &mask);
}
error = cpuset_setthread(td->td_tid, &mask);
if (error)
panic(
"Unable to bind KTLS thread for CPU %d error %d",
i, error);
if (ktls_bind_threads > 1) {
pc = pcpu_find(i);
domain = pc->pc_domain;
count = ktls_domains[domain].count;
ktls_domains[domain].cpu[count] = i;
ktls_domains[domain].count++;
}
ktls_cpuid_lookup[ktls_number_threads] = i;
ktls_number_threads++;
}
/*
* If we somehow have an empty domain, fall back to choosing
* among all KTLS threads.
*/
if (ktls_bind_threads > 1) {
for (i = 0; i < vm_ndomains; i++) {
if (ktls_domains[i].count == 0) {
ktls_bind_threads = 1;
break;
}
}
}
/* Start kthreads for each workqueue. */
CPU_FOREACH(i) {
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
if (error) {
printf("Can't add KTLS thread %d error %d\n", i, error);
return (error);
}
}
/*
* Start an allocation thread per-domain to perform blocking allocations
* of 16k physically contiguous TLS crypto destination buffers.
@ -454,35 +460,46 @@ ktls_init(void *dummy __unused)
&ktls_domains[domain], &ktls_proc,
&ktls_domains[domain].alloc_td.td,
0, 0, "KTLS", "alloc_%d", domain);
if (error)
panic("Can't add KTLS alloc thread %d error %d",
if (error) {
printf("Can't add KTLS alloc thread %d error %d\n",
domain, error);
CPU_COPY(&cpuset_domain[domain], &mask);
error = cpuset_setthread(ktls_domains[domain].alloc_td.td->td_tid,
&mask);
if (error)
panic("Unable to bind KTLS alloc %d error %d",
domain, error);
}
}
/*
* If we somehow have an empty domain, fall back to choosing
* among all KTLS threads.
*/
if (ktls_bind_threads > 1) {
for (i = 0; i < vm_ndomains; i++) {
if (ktls_domains[i].count == 0) {
ktls_bind_threads = 1;
break;
return (error);
}
}
}
if (bootverbose)
printf("KTLS: Initialized %d threads\n", ktls_number_threads);
return (0);
}
static int
ktls_start_kthreads(void)
{
int error, state;
start:
state = atomic_load_acq_int(&ktls_init_state);
if (__predict_true(state > 0))
return (0);
if (state < 0)
return (ENXIO);
sx_xlock(&ktls_init_lock);
if (ktls_init_state != 0) {
sx_xunlock(&ktls_init_lock);
goto start;
}
error = ktls_init();
if (error == 0)
state = 1;
else
state = -1;
atomic_store_rel_int(&ktls_init_state, state);
sx_xunlock(&ktls_init_lock);
return (error);
}
SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL);
#if defined(INET) || defined(INET6)
static int
@ -583,6 +600,10 @@ ktls_create_session(struct socket *so, struct tls_enable *en,
return (EINVAL);
}
error = ktls_start_kthreads();
if (error != 0)
return (error);
tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
counter_u64_add(ktls_offload_active, 1);
@ -2457,6 +2478,18 @@ ktls_encrypt_async(struct ktls_wq *wq, struct mbuf *top)
CURVNET_RESTORE();
}
static int
ktls_bind_domain(int domain)
{
int error;
error = cpuset_setthread(curthread->td_tid, &cpuset_domain[domain]);
if (error != 0)
return (error);
curthread->td_domain.dr_policy = DOMAINSET_PREF(domain);
return (0);
}
static void
ktls_alloc_thread(void *ctx)
{
@ -2465,14 +2498,16 @@ ktls_alloc_thread(void *ctx)
void **buf;
struct sysctl_oid *oid;
char name[80];
int i, nbufs;
int domain, error, i, nbufs;
curthread->td_domain.dr_policy =
DOMAINSET_PREF(PCPU_GET(domain));
snprintf(name, sizeof(name), "domain%d", PCPU_GET(domain));
domain = ktls_domain - ktls_domains;
if (bootverbose)
printf("Starting KTLS alloc thread for domain %d\n",
PCPU_GET(domain));
printf("Starting KTLS alloc thread for domain %d\n", domain);
error = ktls_bind_domain(domain);
if (error)
printf("Unable to bind KTLS alloc thread for domain %d: error %d\n",
domain, error);
snprintf(name, sizeof(name), "domain%d", domain);
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), OID_AUTO,
name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs",
@ -2527,10 +2562,32 @@ ktls_work_thread(void *ctx)
struct socket *so, *son;
STAILQ_HEAD(, mbuf) local_m_head;
STAILQ_HEAD(, socket) local_so_head;
int cpu;
if (ktls_bind_threads > 1) {
curthread->td_domain.dr_policy =
DOMAINSET_PREF(PCPU_GET(domain));
cpu = wq - ktls_wq;
if (bootverbose)
printf("Starting KTLS worker thread for CPU %d\n", cpu);
/*
* Bind to a core. If ktls_bind_threads is > 1, then
* we bind to the NUMA domain instead.
*/
if (ktls_bind_threads) {
int error;
if (ktls_bind_threads > 1) {
struct pcpu *pc = pcpu_find(cpu);
error = ktls_bind_domain(pc->pc_domain);
} else {
cpuset_t mask;
CPU_SETOF(cpu, &mask);
error = cpuset_setthread(curthread->td_tid, &mask);
}
if (error)
printf("Unable to bind KTLS worker thread for CPU %d: error %d\n",
cpu, error);
}
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
fpu_kern_thread(0);