ktls: Defer creation of threads and zones until first use.
Run ktls_init() when the first KTLS session is created rather than unconditionally during boot. This avoids creating unused threads and allocating unused resources on systems which do not use KTLS. Reviewed by: gallatin, markj Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D32487
This commit is contained in:
parent
86929782cf
commit
a72ee35564
@ -109,6 +109,9 @@ static struct proc *ktls_proc;
|
||||
static uma_zone_t ktls_session_zone;
|
||||
static uma_zone_t ktls_buffer_zone;
|
||||
static uint16_t ktls_cpuid_lookup[MAXCPU];
|
||||
static int ktls_init_state;
|
||||
static struct sx ktls_init_lock;
|
||||
SX_SYSINIT(ktls_init_lock, &ktls_init_lock, "ktls init");
|
||||
|
||||
SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
||||
"Kernel TLS offload");
|
||||
@ -379,12 +382,11 @@ ktls_free_mext_contig(struct mbuf *m)
|
||||
uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0]));
|
||||
}
|
||||
|
||||
static void
|
||||
ktls_init(void *dummy __unused)
|
||||
static int
|
||||
ktls_init(void)
|
||||
{
|
||||
struct thread *td;
|
||||
struct pcpu *pc;
|
||||
cpuset_t mask;
|
||||
int count, domain, error, i;
|
||||
|
||||
ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS,
|
||||
@ -410,36 +412,40 @@ ktls_init(void *dummy __unused)
|
||||
STAILQ_INIT(&ktls_wq[i].m_head);
|
||||
STAILQ_INIT(&ktls_wq[i].so_head);
|
||||
mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
|
||||
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
|
||||
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
|
||||
if (error)
|
||||
panic("Can't add KTLS thread %d error %d", i, error);
|
||||
|
||||
/*
|
||||
* Bind threads to cores. If ktls_bind_threads is >
|
||||
* 1, then we bind to the NUMA domain.
|
||||
*/
|
||||
if (ktls_bind_threads) {
|
||||
if (ktls_bind_threads > 1) {
|
||||
pc = pcpu_find(i);
|
||||
domain = pc->pc_domain;
|
||||
CPU_COPY(&cpuset_domain[domain], &mask);
|
||||
count = ktls_domains[domain].count;
|
||||
ktls_domains[domain].cpu[count] = i;
|
||||
ktls_domains[domain].count++;
|
||||
} else {
|
||||
CPU_SETOF(i, &mask);
|
||||
}
|
||||
error = cpuset_setthread(td->td_tid, &mask);
|
||||
if (error)
|
||||
panic(
|
||||
"Unable to bind KTLS thread for CPU %d error %d",
|
||||
i, error);
|
||||
if (ktls_bind_threads > 1) {
|
||||
pc = pcpu_find(i);
|
||||
domain = pc->pc_domain;
|
||||
count = ktls_domains[domain].count;
|
||||
ktls_domains[domain].cpu[count] = i;
|
||||
ktls_domains[domain].count++;
|
||||
}
|
||||
ktls_cpuid_lookup[ktls_number_threads] = i;
|
||||
ktls_number_threads++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we somehow have an empty domain, fall back to choosing
|
||||
* among all KTLS threads.
|
||||
*/
|
||||
if (ktls_bind_threads > 1) {
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
if (ktls_domains[i].count == 0) {
|
||||
ktls_bind_threads = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Start kthreads for each workqueue. */
|
||||
CPU_FOREACH(i) {
|
||||
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
|
||||
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
|
||||
if (error) {
|
||||
printf("Can't add KTLS thread %d error %d\n", i, error);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start an allocation thread per-domain to perform blocking allocations
|
||||
* of 16k physically contiguous TLS crypto destination buffers.
|
||||
@ -454,35 +460,46 @@ ktls_init(void *dummy __unused)
|
||||
&ktls_domains[domain], &ktls_proc,
|
||||
&ktls_domains[domain].alloc_td.td,
|
||||
0, 0, "KTLS", "alloc_%d", domain);
|
||||
if (error)
|
||||
panic("Can't add KTLS alloc thread %d error %d",
|
||||
if (error) {
|
||||
printf("Can't add KTLS alloc thread %d error %d\n",
|
||||
domain, error);
|
||||
CPU_COPY(&cpuset_domain[domain], &mask);
|
||||
error = cpuset_setthread(ktls_domains[domain].alloc_td.td->td_tid,
|
||||
&mask);
|
||||
if (error)
|
||||
panic("Unable to bind KTLS alloc %d error %d",
|
||||
domain, error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we somehow have an empty domain, fall back to choosing
|
||||
* among all KTLS threads.
|
||||
*/
|
||||
if (ktls_bind_threads > 1) {
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
if (ktls_domains[i].count == 0) {
|
||||
ktls_bind_threads = 1;
|
||||
break;
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bootverbose)
|
||||
printf("KTLS: Initialized %d threads\n", ktls_number_threads);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
ktls_start_kthreads(void)
|
||||
{
|
||||
int error, state;
|
||||
|
||||
start:
|
||||
state = atomic_load_acq_int(&ktls_init_state);
|
||||
if (__predict_true(state > 0))
|
||||
return (0);
|
||||
if (state < 0)
|
||||
return (ENXIO);
|
||||
|
||||
sx_xlock(&ktls_init_lock);
|
||||
if (ktls_init_state != 0) {
|
||||
sx_xunlock(&ktls_init_lock);
|
||||
goto start;
|
||||
}
|
||||
|
||||
error = ktls_init();
|
||||
if (error == 0)
|
||||
state = 1;
|
||||
else
|
||||
state = -1;
|
||||
atomic_store_rel_int(&ktls_init_state, state);
|
||||
sx_xunlock(&ktls_init_lock);
|
||||
return (error);
|
||||
}
|
||||
SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL);
|
||||
|
||||
#if defined(INET) || defined(INET6)
|
||||
static int
|
||||
@ -583,6 +600,10 @@ ktls_create_session(struct socket *so, struct tls_enable *en,
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
error = ktls_start_kthreads();
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
|
||||
|
||||
counter_u64_add(ktls_offload_active, 1);
|
||||
@ -2457,6 +2478,18 @@ ktls_encrypt_async(struct ktls_wq *wq, struct mbuf *top)
|
||||
CURVNET_RESTORE();
|
||||
}
|
||||
|
||||
static int
|
||||
ktls_bind_domain(int domain)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = cpuset_setthread(curthread->td_tid, &cpuset_domain[domain]);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
curthread->td_domain.dr_policy = DOMAINSET_PREF(domain);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
ktls_alloc_thread(void *ctx)
|
||||
{
|
||||
@ -2465,14 +2498,16 @@ ktls_alloc_thread(void *ctx)
|
||||
void **buf;
|
||||
struct sysctl_oid *oid;
|
||||
char name[80];
|
||||
int i, nbufs;
|
||||
int domain, error, i, nbufs;
|
||||
|
||||
curthread->td_domain.dr_policy =
|
||||
DOMAINSET_PREF(PCPU_GET(domain));
|
||||
snprintf(name, sizeof(name), "domain%d", PCPU_GET(domain));
|
||||
domain = ktls_domain - ktls_domains;
|
||||
if (bootverbose)
|
||||
printf("Starting KTLS alloc thread for domain %d\n",
|
||||
PCPU_GET(domain));
|
||||
printf("Starting KTLS alloc thread for domain %d\n", domain);
|
||||
error = ktls_bind_domain(domain);
|
||||
if (error)
|
||||
printf("Unable to bind KTLS alloc thread for domain %d: error %d\n",
|
||||
domain, error);
|
||||
snprintf(name, sizeof(name), "domain%d", domain);
|
||||
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ipc_tls), OID_AUTO,
|
||||
name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
|
||||
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs",
|
||||
@ -2527,10 +2562,32 @@ ktls_work_thread(void *ctx)
|
||||
struct socket *so, *son;
|
||||
STAILQ_HEAD(, mbuf) local_m_head;
|
||||
STAILQ_HEAD(, socket) local_so_head;
|
||||
int cpu;
|
||||
|
||||
if (ktls_bind_threads > 1) {
|
||||
curthread->td_domain.dr_policy =
|
||||
DOMAINSET_PREF(PCPU_GET(domain));
|
||||
cpu = wq - ktls_wq;
|
||||
if (bootverbose)
|
||||
printf("Starting KTLS worker thread for CPU %d\n", cpu);
|
||||
|
||||
/*
|
||||
* Bind to a core. If ktls_bind_threads is > 1, then
|
||||
* we bind to the NUMA domain instead.
|
||||
*/
|
||||
if (ktls_bind_threads) {
|
||||
int error;
|
||||
|
||||
if (ktls_bind_threads > 1) {
|
||||
struct pcpu *pc = pcpu_find(cpu);
|
||||
|
||||
error = ktls_bind_domain(pc->pc_domain);
|
||||
} else {
|
||||
cpuset_t mask;
|
||||
|
||||
CPU_SETOF(cpu, &mask);
|
||||
error = cpuset_setthread(curthread->td_tid, &mask);
|
||||
}
|
||||
if (error)
|
||||
printf("Unable to bind KTLS worker thread for CPU %d: error %d\n",
|
||||
cpu, error);
|
||||
}
|
||||
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
|
||||
fpu_kern_thread(0);
|
||||
|
Loading…
x
Reference in New Issue
Block a user