vfs_export: Add mnt_exjail to control exports done in prisons

If there are multiple instances of mountd(8) (in different
prisons), there will be confusion if they manipulate the
exports of the same file system.  This patch adds mnt_exjail
to "struct mount" so that the credentials (and, therefore,
the prison) that did the exports for that file system can
be recorded.  If another prison has already exported the
file system, vfs_export() will fail with an error.
If mnt_exjail == NULL, the file system has not been exported.
mnt_exjail is checked by the NFS server, so that exports done
from within a different prison will not be used.

The patch also implements vfs_exjail_destroy(), which is
called from prison_cleanup() to release all the mnt_exjail
credential references, so that the prison can be removed.
Mainly to avoid doing a scan of the mountlist for the case
where there were no exports done from within the prison,
a count of how many file systems have been exported from
within the prison is kept in pr_exportcnt.

Reviewed by:	markj
Discussed with:	jamie
Differential Revision:	https://reviews.freebsd.org/D38371
MFC after:	3 months
This commit is contained in:
Rick Macklem 2023-02-21 13:00:42 -08:00
parent 6b802933f1
commit 88175af8b7
6 changed files with 194 additions and 14 deletions

View File

@ -3259,8 +3259,16 @@ nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
{
int error;
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
&exp->nes_numsecflavor, exp->nes_secflavors);
error = 0;
*credp = NULL;
MNT_ILOCK(mp);
if (mp->mnt_exjail == NULL ||
mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
error = EACCES;
MNT_IUNLOCK(mp);
if (error == 0)
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
&exp->nes_numsecflavor, exp->nes_secflavors);
if (error) {
if (NFSD_VNET(nfs_rootfhset)) {
exp->nes_exflag = 0;
@ -3294,8 +3302,14 @@ nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
/* Make sure the server replies ESTALE to the client. */
error = ESTALE;
if (nam && !error) {
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
&exp->nes_numsecflavor, exp->nes_secflavors);
MNT_ILOCK(mp);
if (mp->mnt_exjail == NULL ||
mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
error = EACCES;
MNT_IUNLOCK(mp);
if (error == 0)
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
&exp->nes_numsecflavor, exp->nes_secflavors);
if (error) {
if (NFSD_VNET(nfs_rootfhset)) {
exp->nes_exflag = 0;
@ -3465,7 +3479,7 @@ nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
struct nameidata nd;
fhandle_t fh;
error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export);
error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false);
if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
NFSD_VNET(nfs_rootfhset) = 0;
else if (error == 0) {

View File

@ -3332,6 +3332,7 @@ prison_cleanup(struct prison *pr)
{
sx_assert(&allprison_lock, SA_XLOCKED);
mtx_assert(&pr->pr_mtx, MA_NOTOWNED);
vfs_exjail_delete(pr);
shm_remove_prison(pr);
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
}

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rmlock.h>
#include <sys/refcount.h>
#include <sys/signalvar.h>
@ -296,12 +297,18 @@ vfs_free_addrlist(struct netexport *nep)
* and the passed in netexport.
* Struct export_args *argp is the variable used to twiddle options,
* the structure is described in sys/mount.h
* The do_exjail argument should be true if *mp is in the mountlist
* and false if not. It is not in the mountlist for the NFSv4 rootfs
* fake mount point just used for exports.
*/
int
vfs_export(struct mount *mp, struct export_args *argp)
vfs_export(struct mount *mp, struct export_args *argp, bool do_exjail)
{
struct netexport *nep;
struct ucred *cr;
struct prison *pr;
int error;
bool new_nep;
if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0)
return (EINVAL);
@ -312,6 +319,7 @@ vfs_export(struct mount *mp, struct export_args *argp)
return (EINVAL);
error = 0;
pr = curthread->td_ucred->cr_prison;
lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
nep = mp->mnt_export;
if (argp->ex_flags & MNT_DELEXPORT) {
@ -319,6 +327,21 @@ vfs_export(struct mount *mp, struct export_args *argp)
error = ENOENT;
goto out;
}
MNT_ILOCK(mp);
if (mp->mnt_exjail != NULL && mp->mnt_exjail->cr_prison != pr &&
pr == &prison0) {
MNT_IUNLOCK(mp);
/* EXDEV will not get logged by mountd(8). */
error = EXDEV;
goto out;
} else if (mp->mnt_exjail != NULL &&
mp->mnt_exjail->cr_prison != pr) {
MNT_IUNLOCK(mp);
/* EPERM will get logged by mountd(8). */
error = EPERM;
goto out;
}
MNT_IUNLOCK(mp);
if (mp->mnt_flag & MNT_EXPUBLIC) {
vfs_setpublicfs(NULL, NULL, NULL);
MNT_ILOCK(mp);
@ -330,18 +353,51 @@ vfs_export(struct mount *mp, struct export_args *argp)
free(nep, M_MOUNT);
nep = NULL;
MNT_ILOCK(mp);
cr = mp->mnt_exjail;
mp->mnt_exjail = NULL;
mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
MNT_IUNLOCK(mp);
if (cr != NULL) {
atomic_subtract_int(&pr->pr_exportcnt, 1);
crfree(cr);
}
}
if (argp->ex_flags & MNT_EXPORTED) {
new_nep = false;
MNT_ILOCK(mp);
if (mp->mnt_exjail == NULL) {
MNT_IUNLOCK(mp);
if (do_exjail && nep != NULL) {
vfs_free_addrlist(nep);
memset(nep, 0, sizeof(*nep));
new_nep = true;
}
} else if (mp->mnt_exjail->cr_prison != pr) {
MNT_IUNLOCK(mp);
error = EPERM;
goto out;
} else
MNT_IUNLOCK(mp);
if (nep == NULL) {
nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO);
nep = malloc(sizeof(struct netexport), M_MOUNT,
M_WAITOK | M_ZERO);
mp->mnt_export = nep;
new_nep = true;
}
if (argp->ex_flags & MNT_EXPUBLIC) {
if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) {
if (new_nep) {
mp->mnt_export = NULL;
free(nep, M_MOUNT);
}
goto out;
}
new_nep = false;
MNT_ILOCK(mp);
if (do_exjail && mp->mnt_exjail == NULL) {
mp->mnt_exjail = crhold(curthread->td_ucred);
atomic_add_int(&pr->pr_exportcnt, 1);
}
mp->mnt_flag |= MNT_EXPUBLIC;
MNT_IUNLOCK(mp);
}
@ -349,9 +405,18 @@ vfs_export(struct mount *mp, struct export_args *argp)
argp->ex_numsecflavors = 1;
argp->ex_secflavors[0] = AUTH_SYS;
}
if ((error = vfs_hang_addrlist(mp, nep, argp)))
if ((error = vfs_hang_addrlist(mp, nep, argp))) {
if (new_nep) {
mp->mnt_export = NULL;
free(nep, M_MOUNT);
}
goto out;
}
MNT_ILOCK(mp);
if (do_exjail && mp->mnt_exjail == NULL) {
mp->mnt_exjail = crhold(curthread->td_ucred);
atomic_add_int(&pr->pr_exportcnt, 1);
}
mp->mnt_flag |= MNT_EXPORTED;
MNT_IUNLOCK(mp);
}
@ -371,6 +436,97 @@ vfs_export(struct mount *mp, struct export_args *argp)
return (error);
}
/*
* Get rid of credential references for this prison.
*/
void
vfs_exjail_delete(struct prison *pr)
{
struct mount *mp;
struct ucred *cr;
int error, i;
/*
* Since this function is called from prison_cleanup() after
* all processes in the prison have exited, the value of
* pr_exportcnt can no longer increase. It is possible for
* a dismount of a file system exported within this prison
* to be in progress. In this case, the file system is no
* longer in the mountlist and the mnt_exjail will be free'd
* by vfs_mount_destroy() at some time. As such, pr_exportcnt
* and, therefore "i", is the upper bound on the number of
* mnt_exjail entries to be found by this function.
*/
i = atomic_load_int(&pr->pr_exportcnt);
KASSERT(i >= 0, ("vfs_exjail_delete: pr_exportcnt negative"));
if (i == 0)
return;
mtx_lock(&mountlist_mtx);
tryagain:
TAILQ_FOREACH(mp, &mountlist, mnt_list) {
MNT_ILOCK(mp);
if (mp->mnt_exjail != NULL &&
mp->mnt_exjail->cr_prison == pr) {
MNT_IUNLOCK(mp);
error = vfs_busy(mp, MBF_MNTLSTLOCK | MBF_NOWAIT);
if (error != 0) {
/*
* If the vfs_busy() fails, we still want to
* get rid of mnt_exjail for two reasons:
* - a credential reference will result in
* a prison not being removed
* - setting mnt_exjail NULL indicates that
* the exports are no longer valid
* The now invalid exports will be deleted
* when the file system is dismounted or
* the file system is re-exported by mountd.
*/
cr = NULL;
MNT_ILOCK(mp);
if (mp->mnt_exjail != NULL &&
mp->mnt_exjail->cr_prison == pr) {
cr = mp->mnt_exjail;
mp->mnt_exjail = NULL;
}
MNT_IUNLOCK(mp);
if (cr != NULL) {
crfree(cr);
i--;
}
if (i == 0)
break;
continue;
}
cr = NULL;
lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
MNT_ILOCK(mp);
if (mp->mnt_exjail != NULL &&
mp->mnt_exjail->cr_prison == pr) {
cr = mp->mnt_exjail;
mp->mnt_exjail = NULL;
mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
MNT_IUNLOCK(mp);
vfs_free_addrlist(mp->mnt_export);
free(mp->mnt_export, M_MOUNT);
mp->mnt_export = NULL;
} else
MNT_IUNLOCK(mp);
lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
if (cr != NULL) {
crfree(cr);
i--;
}
mtx_lock(&mountlist_mtx);
vfs_unbusy(mp);
if (i == 0)
break;
goto tryagain;
}
MNT_IUNLOCK(mp);
}
mtx_unlock(&mountlist_mtx);
}
/*
* Set the publicly exported filesystem (WebNFS). Currently, only
* one public filesystem is possible in the spec (RFC 2054 and 2055)

View File

@ -761,6 +761,11 @@ vfs_mount_destroy(struct mount *mp)
#endif
if (mp->mnt_opt != NULL)
vfs_freeopts(mp->mnt_opt);
if (mp->mnt_exjail != NULL) {
atomic_subtract_int(&mp->mnt_exjail->cr_prison->pr_exportcnt,
1);
crfree(mp->mnt_exjail);
}
if (mp->mnt_export != NULL) {
vfs_free_addrlist(mp->mnt_export);
free(mp->mnt_export, M_MOUNT);
@ -1395,7 +1400,7 @@ vfs_domount_update(
} else
export_error = EINVAL;
if (export_error == 0)
export_error = vfs_export(mp, &export);
export_error = vfs_export(mp, &export, true);
free(export.ex_groups, M_TEMP);
break;
case (sizeof(export)):
@ -1417,7 +1422,7 @@ vfs_domount_update(
else
export_error = EINVAL;
if (export_error == 0)
export_error = vfs_export(mp, &export);
export_error = vfs_export(mp, &export, true);
free(grps, M_TEMP);
break;
default:

View File

@ -196,7 +196,8 @@ struct prison {
int pr_enforce_statfs; /* (p) statfs permission */
int pr_devfs_rsnum; /* (p) devfs ruleset */
enum prison_state pr_state; /* (q) state in life cycle */
int pr_spare[2];
volatile int pr_exportcnt; /* (r) count of mount exports */
int pr_spare;
int pr_osreldate; /* (c) kern.osreldate value */
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */

View File

@ -216,6 +216,7 @@ struct mount_upper_node {
* i - interlock
* v - vnode freelist mutex
* d - deferred unmount list mutex
* e - mnt_explock
*
* Unmarked fields are considered stable as long as a ref is held.
*
@ -245,13 +246,14 @@ struct mount {
void * mnt_data; /* private data */
time_t mnt_time; /* last time written*/
int mnt_iosize_max; /* max size for clusters, etc */
struct netexport *mnt_export; /* export list */
struct netexport *mnt_export; /* (e) export list */
struct label *mnt_label; /* MAC label for the fs */
u_int mnt_hashseed; /* Random seed for vfs_hash */
int mnt_lockref; /* (i) Lock reference count */
int mnt_secondary_writes; /* (i) # of secondary writes */
int mnt_secondary_accwrites;/* (i) secondary wr. starts */
struct thread *mnt_susp_owner; /* (i) thread owning suspension */
struct ucred *mnt_exjail; /* (i) jail which did exports */
#define mnt_endzero mnt_gjprovider
char *mnt_gjprovider; /* gjournal provider name */
struct mtx mnt_listmtx;
@ -1015,8 +1017,9 @@ int vfs_setpublicfs /* set publicly exported fs */
(struct mount *, struct netexport *, struct export_args *);
void vfs_periodic(struct mount *, int);
int vfs_busy(struct mount *, int);
void vfs_exjail_delete(struct prison *);
int vfs_export /* process mount export info */
(struct mount *, struct export_args *);
(struct mount *, struct export_args *, bool);
void vfs_free_addrlist(struct netexport *);
void vfs_allocate_syncvnode(struct mount *);
void vfs_deallocate_syncvnode(struct mount *);