cred: distribute reference count per thread

This avoids dirtying creds in the common case, see the comment in kern_prot.c
for details.

Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D24007
This commit is contained in:
Mateusz Guzik 2020-06-09 23:03:48 +00:00
parent b4a7ce0690
commit 1724c563e6
6 changed files with 206 additions and 50 deletions

View File

@ -541,7 +541,10 @@ proc0_init(void *dummy __unused)
/* End hack. creds get properly set later with thread_cow_get_proc */
curthread->td_ucred = NULL;
newcred->cr_prison = &prison0;
newcred->cr_users++; /* avoid assertion failure */
proc_set_cred_init(p, newcred);
newcred->cr_users--;
crfree(newcred);
#ifdef AUDIT
audit_cred_kproc0(newcred);
#endif
@ -810,8 +813,9 @@ create_init(const void *udata __unused)
#endif
proc_set_cred(initproc, newcred);
td = FIRST_THREAD_IN_PROC(initproc);
crfree(td->td_ucred);
td->td_ucred = crhold(initproc->p_ucred);
crcowfree(td);
td->td_realucred = crcowget(initproc->p_ucred);
td->td_ucred = td->td_realucred;
PROC_UNLOCK(initproc);
sx_xunlock(&proctree_lock);
crfree(oldcred);

View File

@ -961,7 +961,7 @@ fork1(struct thread *td, struct fork_req *fr)
* XXX: This is ugly; when we copy resource usage, we need to bump
* per-cred resource counters.
*/
proc_set_cred_init(newproc, crhold(td->td_ucred));
proc_set_cred_init(newproc, td->td_ucred);
/*
* Initialize resource accounting for the child process.
@ -998,8 +998,7 @@ fork1(struct thread *td, struct fork_req *fr)
#endif
racct_proc_exit(newproc);
fail1:
crfree(newproc->p_ucred);
newproc->p_ucred = NULL;
proc_unset_cred(newproc);
fail2:
if (vm2 != NULL)
vmspace_free(vm2);

View File

@ -1840,6 +1840,98 @@ p_canwait(struct thread *td, struct proc *p)
return (0);
}
/*
* Credential management.
*
* struct ucred objects are rarely allocated but gain and lose references all
* the time (e.g., on struct file alloc/dealloc) turning refcount updates into
* a significant source of cache-line ping ponging. Common cases are worked
* around by modifying thread-local counter instead if the cred to operate on
* matches td_realucred.
*
* The counter is split into 2 parts:
* - cr_users -- total count of all struct proc and struct thread objects
* which have given cred in p_ucred and td_ucred respectively
* - cr_ref -- the actual ref count, only valid if cr_users == 0
*
* If users == 0 then cr_ref behaves similarly to refcount(9), in particular if
* the count reaches 0 the object is freeable.
* If users > 0 and curthread->td_realucred == cred, then updates are performed
* against td_ucredref.
* In other cases updates are performed against cr_ref.
*
* Changing td_realucred into something else decrements cr_users and transfers
* accumulated updates.
*/
struct ucred *
crcowget(struct ucred *cr)
{
mtx_lock(&cr->cr_mtx);
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
cr->cr_users++;
cr->cr_ref++;
mtx_unlock(&cr->cr_mtx);
return (cr);
}
static struct ucred *
crunuse(struct thread *td)
{
struct ucred *cr, *crold;
cr = td->td_ucred;
mtx_lock(&cr->cr_mtx);
cr->cr_ref += td->td_ucredref;
td->td_ucredref = 0;
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
cr->cr_users--;
if (cr->cr_users == 0) {
KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
__func__, cr->cr_ref, cr));
crold = cr;
} else {
cr->cr_ref--;
crold = NULL;
}
mtx_unlock(&cr->cr_mtx);
return (crold);
}
void
crcowfree(struct thread *td)
{
struct ucred *cr;
cr = crunuse(td);
if (cr != NULL)
crfree(cr);
}
struct ucred *
crcowsync(void)
{
struct thread *td;
struct proc *p;
struct ucred *crnew, *crold;
td = curthread;
p = td->td_proc;
PROC_LOCK_ASSERT(p, MA_OWNED);
MPASS(td->td_realucred == td->td_ucred);
if (td->td_realucred == p->p_ucred)
return (NULL);
crnew = crcowget(p->p_ucred);
crold = crunuse(td);
td->td_realucred = crnew;
td->td_ucred = td->td_realucred;
return (crold);
}
/*
* Allocate a zeroed cred structure.
*/
@ -1849,7 +1941,8 @@ crget(void)
struct ucred *cr;
cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
refcount_init(&cr->cr_ref, 1);
mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF);
cr->cr_ref = 1;
#ifdef AUDIT
audit_cred_init(cr);
#endif
@ -1868,8 +1961,18 @@ crget(void)
struct ucred *
crhold(struct ucred *cr)
{
struct thread *td;
refcount_acquire(&cr->cr_ref);
td = curthread;
if (__predict_true(td->td_realucred == cr)) {
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
td->td_ucredref++;
return (cr);
}
mtx_lock(&cr->cr_mtx);
cr->cr_ref++;
mtx_unlock(&cr->cr_mtx);
return (cr);
}
@ -1879,36 +1982,51 @@ crhold(struct ucred *cr)
void
crfree(struct ucred *cr)
{
struct thread *td;
KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
if (refcount_release(&cr->cr_ref)) {
/*
* Some callers of crget(), such as nfs_statfs(),
* allocate a temporary credential, but don't
* allocate a uidinfo structure.
*/
if (cr->cr_uidinfo != NULL)
uifree(cr->cr_uidinfo);
if (cr->cr_ruidinfo != NULL)
uifree(cr->cr_ruidinfo);
/*
* Free a prison, if any.
*/
if (cr->cr_prison != NULL)
prison_free(cr->cr_prison);
if (cr->cr_loginclass != NULL)
loginclass_free(cr->cr_loginclass);
td = curthread;
if (td->td_realucred == cr) {
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
td->td_ucredref--;
return;
}
mtx_lock(&cr->cr_mtx);
KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p",
__func__, cr->cr_users, cr));
cr->cr_ref--;
if (cr->cr_users > 0) {
mtx_unlock(&cr->cr_mtx);
return;
}
KASSERT(cr->cr_ref >= 0, ("%s: ref %d not >= 0 on cred %p",
__func__, cr->cr_ref, cr));
if (cr->cr_ref > 0) {
mtx_unlock(&cr->cr_mtx);
return;
}
/*
* Some callers of crget(), such as nfs_statfs(), allocate a temporary
* credential, but don't allocate a uidinfo structure.
*/
if (cr->cr_uidinfo != NULL)
uifree(cr->cr_uidinfo);
if (cr->cr_ruidinfo != NULL)
uifree(cr->cr_ruidinfo);
if (cr->cr_prison != NULL)
prison_free(cr->cr_prison);
if (cr->cr_loginclass != NULL)
loginclass_free(cr->cr_loginclass);
#ifdef AUDIT
audit_cred_destroy(cr);
audit_cred_destroy(cr);
#endif
#ifdef MAC
mac_cred_destroy(cr);
mac_cred_destroy(cr);
#endif
if (cr->cr_groups != cr->cr_smallgroups)
free(cr->cr_groups, M_CRED);
free(cr, M_CRED);
}
mtx_destroy(&cr->cr_mtx);
if (cr->cr_groups != cr->cr_smallgroups)
free(cr->cr_groups, M_CRED);
free(cr, M_CRED);
}
/*
@ -1982,7 +2100,7 @@ void
proc_set_cred_init(struct proc *p, struct ucred *newcred)
{
p->p_ucred = newcred;
p->p_ucred = crcowget(newcred);
}
/*
@ -1998,10 +2116,20 @@ proc_set_cred_init(struct proc *p, struct ucred *newcred)
void
proc_set_cred(struct proc *p, struct ucred *newcred)
{
struct ucred *cr;
MPASS(p->p_ucred != NULL);
cr = p->p_ucred;
MPASS(cr != NULL);
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p",
__func__, newcred->cr_users, newcred));
mtx_lock(&cr->cr_mtx);
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
cr->cr_users--;
mtx_unlock(&cr->cr_mtx);
p->p_ucred = newcred;
newcred->cr_users = 1;
PROC_UPDATE_COW(p);
}
@ -2010,9 +2138,17 @@ proc_unset_cred(struct proc *p)
{
struct ucred *cr;
MPASS(p->p_state == PRS_ZOMBIE);
MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW);
cr = p->p_ucred;
p->p_ucred = NULL;
KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p",
__func__, cr->cr_users, cr));
mtx_lock(&cr->cr_mtx);
cr->cr_users--;
if (cr->cr_users == 0)
KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p",
__func__, cr->cr_ref, cr));
mtx_unlock(&cr->cr_mtx);
crfree(cr);
}

View File

@ -82,9 +82,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0xfc,
"struct thread KBI td_flags");
_Static_assert(offsetof(struct thread, td_pflags) == 0x104,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x498,
_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
"struct thread KBI td_frame");
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0,
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
"struct proc KBI p_flag");
@ -102,9 +102,9 @@ _Static_assert(offsetof(struct thread, td_flags) == 0x98,
"struct thread KBI td_flags");
_Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x2fc,
_Static_assert(offsetof(struct thread, td_frame) == 0x304,
"struct thread KBI td_frame");
_Static_assert(offsetof(struct thread, td_emuldata) == 0x340,
_Static_assert(offsetof(struct thread, td_emuldata) == 0x348,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0x68,
"struct proc KBI p_flag");
@ -463,7 +463,8 @@ thread_cow_get_proc(struct thread *newtd, struct proc *p)
{
PROC_LOCK_ASSERT(p, MA_OWNED);
newtd->td_ucred = crhold(p->p_ucred);
newtd->td_realucred = crcowget(p->p_ucred);
newtd->td_ucred = newtd->td_realucred;
newtd->td_limit = lim_hold(p->p_limit);
newtd->td_cowgen = p->p_cowgen;
}
@ -472,7 +473,9 @@ void
thread_cow_get(struct thread *newtd, struct thread *td)
{
newtd->td_ucred = crhold(td->td_ucred);
MPASS(td->td_realucred == td->td_ucred);
newtd->td_realucred = crcowget(td->td_realucred);
newtd->td_ucred = newtd->td_realucred;
newtd->td_limit = lim_hold(td->td_limit);
newtd->td_cowgen = td->td_cowgen;
}
@ -481,8 +484,8 @@ void
thread_cow_free(struct thread *td)
{
if (td->td_ucred != NULL)
crfree(td->td_ucred);
if (td->td_realucred != NULL)
crcowfree(td);
if (td->td_limit != NULL)
lim_free(td->td_limit);
}
@ -495,13 +498,9 @@ thread_cow_update(struct thread *td)
struct plimit *oldlimit;
p = td->td_proc;
oldcred = NULL;
oldlimit = NULL;
PROC_LOCK(p);
if (td->td_ucred != p->p_ucred) {
oldcred = td->td_ucred;
td->td_ucred = crhold(p->p_ucred);
}
oldcred = crcowsync();
if (td->td_limit != p->p_limit) {
oldlimit = td->td_limit;
td->td_limit = lim_hold(p->p_limit);

View File

@ -267,7 +267,8 @@ struct thread {
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
int td_intr_nesting_level; /* (k) Interrupt recursion. */
int td_pinned; /* (k) Temporary cpu pin count. */
struct ucred *td_ucred; /* (k) Reference to credentials. */
struct ucred *td_realucred; /* (k) Reference to credentials. */
struct ucred *td_ucred; /* (k) Used credentials, temporarily switchable. */
struct plimit *td_limit; /* (k) Resource limits. */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
@ -305,6 +306,7 @@ struct thread {
int td_errno; /* (k) Error from last syscall. */
size_t td_vslock_sz; /* (k) amount of vslock-ed space */
struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */
u_int td_ucredref; /* (k) references on td_realucred */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */

View File

@ -35,6 +35,10 @@
#ifndef _SYS_UCRED_H_
#define _SYS_UCRED_H_
#if defined(_KERNEL) || defined(_WANT_UCRED)
#include <sys/_lock.h>
#include <sys/_mutex.h>
#endif
#include <bsm/audit.h>
struct loginclass;
@ -46,10 +50,19 @@ struct loginclass;
*
* Please do not inspect cr_uid directly to determine superuserness. The
* priv(9) interface should be used to check for privilege.
*
* Lock reference:
* c - cr_mtx
*
* Unmarked fields are constant after creation.
*
* See "Credential management" comment in kern_prot.c for more information.
*/
#if defined(_KERNEL) || defined(_WANT_UCRED)
struct ucred {
u_int cr_ref; /* reference count */
struct mtx cr_mtx;
u_int cr_ref; /* (c) reference count */
u_int cr_users; /* (c) proc + thread using this cred */
#define cr_startcopy cr_uid
uid_t cr_uid; /* effective user id */
uid_t cr_ruid; /* real user id */
@ -115,8 +128,11 @@ void proc_set_cred_init(struct proc *p, struct ucred *cr);
void proc_set_cred(struct proc *p, struct ucred *cr);
void proc_unset_cred(struct proc *p);
void crfree(struct ucred *cr);
struct ucred *crcowsync(void);
struct ucred *crget(void);
struct ucred *crhold(struct ucred *cr);
struct ucred *crcowget(struct ucred *cr);
void crcowfree(struct thread *td);
void cru2x(struct ucred *cr, struct xucred *xcr);
void cru2xt(struct thread *td, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int n, gid_t *groups);