There is a window where threads are removed from the process list and where

the thread destructor is invoked. Catch that window by waiting for all
task_struct allocations to be returned before freeing the UMA zone in the
LinuxKPI. Else UMA may fail to release the zone due to concurrent access
and panic:

panic() - Bad link element prev->next != elm
zone_release()
bucket_drain()
bucket_free()
zone_dtor()
zone_free_item()
uma_zdestroy()
linux_current_uninit()

This failure can be triggered by loading and unloading the LinuxKPI module
in a loop:

while true
do
kldload linuxkpi
kldunload linuxkpi
done

Discussed with:	kib@
MFC after:	1 week
Sponsored by:	Mellanox Technologies // NVIDIA Networking
This commit is contained in:
Hans Petter Selasky 2021-05-21 13:17:42 +02:00
parent c1fbb54f4b
commit b764a42653

View File

@ -45,6 +45,7 @@ extern u_int first_msi_irq, num_msi_irqs;
static eventhandler_tag linuxkpi_thread_dtor_tag; static eventhandler_tag linuxkpi_thread_dtor_tag;
static atomic_t linux_current_allocs;
static uma_zone_t linux_current_zone; static uma_zone_t linux_current_zone;
static uma_zone_t linux_mm_zone; static uma_zone_t linux_mm_zone;
@ -146,6 +147,10 @@ linux_alloc_current(struct thread *td, int flags)
/* free mm_struct pointer, if any */ /* free mm_struct pointer, if any */
uma_zfree(linux_mm_zone, mm); uma_zfree(linux_mm_zone, mm);
/* keep track of number of allocations */
if (atomic_add_return(1, &linux_current_allocs) == INT_MAX)
panic("linux_alloc_current: Refcount too high!");
return (0); return (0);
} }
@ -173,6 +178,10 @@ linux_free_current(struct task_struct *ts)
{ {
mmput(ts->mm); mmput(ts->mm);
uma_zfree(linux_current_zone, ts); uma_zfree(linux_current_zone, ts);
/* keep track of number of allocations */
if (atomic_sub_return(1, &linux_current_allocs) < 0)
panic("linux_free_current: Negative refcount!");
} }
static void static void
@ -271,10 +280,6 @@ SYSCTL_INT(_compat_linuxkpi, OID_AUTO, task_struct_reserve,
static void static void
linux_current_init(void *arg __unused) linux_current_init(void *arg __unused)
{ {
lkpi_alloc_current = linux_alloc_current;
linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
TUNABLE_INT_FETCH("compat.linuxkpi.task_struct_reserve", TUNABLE_INT_FETCH("compat.linuxkpi.task_struct_reserve",
&lkpi_task_resrv); &lkpi_task_resrv);
if (lkpi_task_resrv == 0) { if (lkpi_task_resrv == 0) {
@ -298,6 +303,12 @@ linux_current_init(void *arg __unused)
UMA_ALIGN_PTR, 0); UMA_ALIGN_PTR, 0);
uma_zone_reserve(linux_mm_zone, lkpi_task_resrv); uma_zone_reserve(linux_mm_zone, lkpi_task_resrv);
uma_prealloc(linux_mm_zone, lkpi_task_resrv); uma_prealloc(linux_mm_zone, lkpi_task_resrv);
atomic_thread_fence_seq_cst();
lkpi_alloc_current = linux_alloc_current;
linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
} }
SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND,
linux_current_init, NULL); linux_current_init, NULL);
@ -309,6 +320,10 @@ linux_current_uninit(void *arg __unused)
struct task_struct *ts; struct task_struct *ts;
struct thread *td; struct thread *td;
lkpi_alloc_current = linux_alloc_current_noop;
atomic_thread_fence_seq_cst();
sx_slock(&allproc_lock); sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) { FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p); PROC_LOCK(p);
@ -321,8 +336,18 @@ linux_current_uninit(void *arg __unused)
PROC_UNLOCK(p); PROC_UNLOCK(p);
} }
sx_sunlock(&allproc_lock); sx_sunlock(&allproc_lock);
/*
* There is a window where threads are removed from the
* process list and where the thread destructor is invoked.
* Catch that window by waiting for all task_struct
* allocations to be returned before freeing the UMA zone.
*/
while (atomic_read(&linux_current_allocs) != 0)
pause("W", 1);
EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag);
lkpi_alloc_current = linux_alloc_current_noop;
uma_zdestroy(linux_current_zone); uma_zdestroy(linux_current_zone);
uma_zdestroy(linux_mm_zone); uma_zdestroy(linux_mm_zone);
} }