Maxime Henrion 9d8353732e o Change kernel_vmount() interface to be more convenient : pass two
separate strings instead of passing "foo=bar".
o Don't forget to clear the VMOUNT flag on the vnode when vfs_nmount()
  fails because the fs doesn't implement VFS_NMOUNT (and in vfs_mount()
  when the fs doesn't implement VFS_MOUNT) ; also decrement the vfs
  refcount in the !MNT_UPDATE case.
2002-04-07 13:22:47 +00:00

4843 lines
106 KiB
C

/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
* $FreeBSD$
*/
/* For 4.3 integer FS ID compatibility */
#include "opt_compat.h"
#include "opt_ffs.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/sysent.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/linker.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/unistd.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/dirent.h>
#include <sys/extattr.h>
#include <sys/jail.h>
#include <sys/sysctl.h>
#include <machine/limits.h>
#include <machine/stdarg.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/uma.h>
static int change_dir(struct nameidata *ndp, struct thread *td);
static void checkdirs(struct vnode *olddp, struct vnode *newdp);
static int chroot_refuse_vdir_fds(struct filedesc *fdp);
static int getutimes(const struct timeval *, struct timespec *);
static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
static int setfmode(struct thread *td, struct vnode *, int);
static int setfflags(struct thread *td, struct vnode *, int);
static int setutimes(struct thread *td, struct vnode *,
const struct timespec *, int);
static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
struct thread *td);
static void vfs_freeopts(struct vfsoptlist *opt);
static int vfs_nmount(struct thread *td, int, struct uio *);
static int usermount = 0; /* if 1, non-root can mount fs. */
int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
/*
* Virtual File System System Calls
*/
#ifndef _SYS_SYSPROTO_H_
struct nmount_args {
struct iovec *iovp;
unsigned int iovcnt;
int flags;
};
#endif
/* ARGSUSED */
int
nmount(td, uap)
struct thread *td;
struct nmount_args /* {
syscallarg(struct iovec *) iovp;
syscallarg(unsigned int) iovcnt;
syscallarg(int) flags;
} */ *uap;
{
struct uio auio;
struct iovec *iov, *needfree;
struct iovec aiov[UIO_SMALLIOV];
long error, i;
u_int iovlen, iovcnt;
iovcnt = SCARG(uap, iovcnt);
iovlen = iovcnt * sizeof (struct iovec);
/*
* Check that we have an even number of iovec's
* and that we have at least two options.
*/
if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
return (EINVAL);
if (iovcnt > UIO_SMALLIOV) {
MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
needfree = iov;
} else {
iov = aiov;
needfree = NULL;
}
auio.uio_iov = iov;
auio.uio_iovcnt = iovcnt;
auio.uio_rw = UIO_WRITE;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auio.uio_offset = 0;
auio.uio_resid = 0;
if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
goto finish;
for (i = 0; i < iovcnt; i++) {
if (iov->iov_len > INT_MAX - auio.uio_resid) {
error = EINVAL;
goto finish;
}
auio.uio_resid += iov->iov_len;
iov++;
}
error = vfs_nmount(td, SCARG(uap, flags), &auio);
finish:
if (needfree != NULL)
free(needfree, M_TEMP);
return (error);
}
/*
* Release all resources related to the
* mount options.
*/
static void
vfs_freeopts(struct vfsoptlist *opt)
{
free(opt->opt, M_MOUNT);
free(opt->optbuf, M_MOUNT);
free(opt, M_MOUNT);
}
int
kernel_mount(iovp, iovcnt, flags)
struct iovec *iovp;
unsigned int iovcnt;
int flags;
{
struct uio auio;
struct iovec *iov;
int error, i;
/*
* Check that we have an even number of iovec's
* and that we have at least two options.
*/
if ((iovcnt & 1) || (iovcnt < 4))
return (EINVAL);
auio.uio_iov = iovp;
auio.uio_iovcnt = iovcnt;
auio.uio_rw = UIO_WRITE;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_offset = 0;
auio.uio_td = NULL;
auio.uio_resid = 0;
iov = iovp;
for (i = 0; i < iovcnt; i++) {
if (iov->iov_len > INT_MAX - auio.uio_resid) {
return (EINVAL);
}
auio.uio_resid += iov->iov_len;
iov++;
}
error = vfs_nmount(curthread, flags, &auio);
return (error);
}
int
kernel_vmount(int flags, ...)
{
struct iovec *iovp;
struct uio auio;
va_list ap;
unsigned int iovcnt, iovlen, len;
const char *cp;
char *buf, *pos;
int error, i, n;
len = 0;
va_start(ap, flags);
for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
len += strlen(cp) + 1;
va_end(ap);
if (iovcnt < 4 || iovcnt & 1)
return (EINVAL);
iovlen = iovcnt * sizeof (struct iovec);
MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
pos = buf;
va_start(ap, flags);
for (i = 0; i < iovcnt; i++) {
cp = va_arg(ap, const char *);
copystr(cp, pos, len - (pos - buf), &n);
iovp[i].iov_base = pos;
iovp[i].iov_len = n;
pos += n;
}
va_end(ap);
auio.uio_iov = iovp;
auio.uio_iovcnt = iovcnt;
auio.uio_rw = UIO_WRITE;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_offset = 0;
auio.uio_td = NULL;
auio.uio_resid = len;
error = vfs_nmount(curthread, flags, &auio);
FREE(iovp, M_MOUNT);
FREE(buf, M_MOUNT);
return (error);
}
/*
* vfs_nmount(): actually attempt a filesystem mount.
*/
static int
vfs_nmount(td, fsflags, fsoptions)
struct thread *td;
int fsflags; /* Flags common to all filesystems. */
struct uio *fsoptions; /* Options local to the filesystem. */
{
linker_file_t lf;
struct vnode *vp;
struct mount *mp;
struct vfsconf *vfsp;
struct iovec *cur;
struct vfsoptlist *optlist;
struct vfsopt *opt;
char *buf, *fstype, *fspath;
int error, flag = 0, kern_flag = 0, i, len, optcnt;
int offset, iovcnt, fstypelen, fspathlen;
struct vattr va;
struct nameidata nd;
/*
* Allocate memory to hold the vfsopt structures.
*/
iovcnt = fsoptions->uio_iovcnt;
optcnt = iovcnt >> 1;
opt = malloc(sizeof (struct vfsopt) * optcnt,
M_MOUNT, M_WAITOK | M_ZERO);
/*
* Count the size of the buffer for options,
* allocate it, and fill in the vfsopt structures.
*/
cur = fsoptions->uio_iov;
len = fsoptions->uio_resid;
buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK);
optlist->opt = opt;
optlist->optbuf = buf;
optlist->optcnt = optcnt;
offset = i = 0;
cur = fsoptions->uio_iov;
while (i < optcnt) {
opt[i].name = buf + offset;
/* Ensure the name of an option is a string. */
if (opt[i].name[cur->iov_len - 1] != '\0') {
error = EINVAL;
goto bad;
}
offset += cur->iov_len;
cur++;
opt[i].len = cur->iov_len;
/*
* Prevent consumers from trying to
* read the value of a 0 length option
* by setting it to NULL.
*/
if (opt[i].len == 0)
opt[i].value = NULL;
else
opt[i].value = buf + offset;
offset += cur->iov_len;
cur++; i++;
}
if ((error = uiomove(buf, len, fsoptions)) != 0)
goto bad;
/*
* We need these two options before the others,
* and they are mandatory for any filesystem.
* Ensure they are NULL terminated as well.
*/
fstypelen = 0;
error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
if (error || fstype[fstypelen - 1] != '\0') {
error = EINVAL;
goto bad;
}
fspathlen = 0;
error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
if (error || fspath[fspathlen - 1] != '\0') {
error = EINVAL;
goto bad;
}
/*
* Be ultra-paranoid about making sure the type and fspath
* variables will fit in our mp buffers, including the
* terminating NUL.
*/
if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
error = ENAMETOOLONG;
goto bad;
}
if (usermount == 0) {
error = suser(td);
if (error)
goto bad;
}
/*
* Do not allow NFS export by non-root users.
*/
if (fsflags & MNT_EXPORTED) {
error = suser(td);
if (error)
goto bad;
}
/*
* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
*/
if (suser(td))
fsflags |= MNT_NOSUID | MNT_NODEV;
/*
* Get vnode to be covered
*/
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
if ((error = namei(&nd)) != 0)
goto bad;
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
if (fsflags & MNT_UPDATE) {
if ((vp->v_flag & VROOT) == 0) {
vput(vp);
error = EINVAL;
goto bad;
}
mp = vp->v_mount;
flag = mp->mnt_flag;
kern_flag = mp->mnt_kern_flag;
/*
* We only allow the filesystem to be reloaded if it
* is currently mounted read-only.
*/
if ((fsflags & MNT_RELOAD) &&
((mp->mnt_flag & MNT_RDONLY) == 0)) {
vput(vp);
error = EOPNOTSUPP; /* Needs translation */
goto bad;
}
/*
* Only root, or the user that did the original mount is
* permitted to update it.
*/
if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
error = suser(td);
if (error) {
vput(vp);
goto bad;
}
}
if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
vput(vp);
error = EBUSY;
goto bad;
}
mtx_lock(&vp->v_interlock);
if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
mtx_unlock(&vp->v_interlock);
vfs_unbusy(mp, td);
vput(vp);
error = EBUSY;
goto bad;
}
vp->v_flag |= VMOUNT;
mtx_unlock(&vp->v_interlock);
mp->mnt_flag |= fsflags &
(MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
VOP_UNLOCK(vp, 0, td);
mp->mnt_optnew = optlist;
goto update;
}
/*
* If the user is not root, ensure that they own the directory
* onto which we are attempting to mount.
*/
error = VOP_GETATTR(vp, &va, td->td_ucred, td);
if (error) {
vput(vp);
goto bad;
}
if (va.va_uid != td->td_ucred->cr_uid) {
error = suser(td);
if (error) {
vput(vp);
goto bad;
}
}
if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
vput(vp);
goto bad;
}
if (vp->v_type != VDIR) {
vput(vp);
error = ENOTDIR;
goto bad;
}
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (!strcmp(vfsp->vfc_name, fstype))
break;
if (vfsp == NULL) {
/* Only load modules for root (very important!). */
error = suser(td);
if (error) {
vput(vp);
goto bad;
}
error = securelevel_gt(td->td_ucred, 0);
if (error) {
vput(vp);
goto bad;
}
error = linker_load_file(fstype, &lf);
if (error || lf == NULL) {
vput(vp);
if (lf == NULL)
error = ENODEV;
goto bad;
}
lf->userrefs++;
/* Look up again to see if the VFS was loaded. */
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (!strcmp(vfsp->vfc_name, fstype))
break;
if (vfsp == NULL) {
lf->userrefs--;
linker_file_unload(lf);
vput(vp);
error = ENODEV;
goto bad;
}
}
mtx_lock(&vp->v_interlock);
if ((vp->v_flag & VMOUNT) != 0 ||
vp->v_mountedhere != NULL) {
mtx_unlock(&vp->v_interlock);
vput(vp);
error = EBUSY;
goto bad;
}
vp->v_flag |= VMOUNT;
mtx_unlock(&vp->v_interlock);
/*
* Allocate and initialize the filesystem.
*/
mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
TAILQ_INIT(&mp->mnt_nvnodelist);
TAILQ_INIT(&mp->mnt_reservedvnlist);
lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
(void)vfs_busy(mp, LK_NOWAIT, 0, td);
mp->mnt_op = vfsp->vfc_vfsops;
mp->mnt_vfc = vfsp;
vfsp->vfc_refcount++;
mp->mnt_stat.f_type = vfsp->vfc_typenum;
mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
mp->mnt_vnodecovered = vp;
mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
mp->mnt_iosize_max = DFLTPHYS;
VOP_UNLOCK(vp, 0, td);
mp->mnt_opt = optlist;
update:
/*
* Check if the fs implements the new VFS_NMOUNT()
* function, since the new system call was used.
*/
if (mp->mnt_op->vfs_mount != NULL) {
printf("%s doesn't support the new mount syscall\n",
mp->mnt_vfc->vfc_name);
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
if (mp->mnt_flag & MNT_UPDATE)
vfs_unbusy(mp, td);
else {
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp, td);
free((caddr_t)mp, M_MOUNT);
}
vput(vp);
error = EOPNOTSUPP;
goto bad;
}
/*
* Set the mount level flags.
*/
if (fsflags & MNT_RDONLY)
mp->mnt_flag |= MNT_RDONLY;
else if (mp->mnt_flag & MNT_RDONLY)
mp->mnt_kern_flag |= MNTK_WANTRDWR;
mp->mnt_flag &=~ MNT_UPDATEMASK;
mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
/*
* Mount the filesystem.
* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
* get. No freeing of cn_pnbuf.
*/
error = VFS_NMOUNT(mp, &nd, td);
if (mp->mnt_flag & MNT_UPDATE) {
if (mp->mnt_kern_flag & MNTK_WANTRDWR)
mp->mnt_flag &= ~MNT_RDONLY;
mp->mnt_flag &=~
(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
if (error) {
mp->mnt_flag = flag;
mp->mnt_kern_flag = kern_flag;
vfs_freeopts(mp->mnt_optnew);
} else {
vfs_freeopts(mp->mnt_opt);
mp->mnt_opt = mp->mnt_optnew;
}
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
if (mp->mnt_syncer == NULL)
error = vfs_allocate_syncvnode(mp);
} else {
if (mp->mnt_syncer != NULL)
vrele(mp->mnt_syncer);
mp->mnt_syncer = NULL;
}
vfs_unbusy(mp, td);
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
vrele(vp);
return (error);
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
/*
* Put the new filesystem on the mount list after root.
*/
cache_purge(vp);
if (!error) {
struct vnode *newdp;
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
vp->v_mountedhere = mp;
mtx_unlock(&vp->v_interlock);
mtx_lock(&mountlist_mtx);
TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
if (VFS_ROOT(mp, &newdp))
panic("mount: lost mount");
checkdirs(vp, newdp);
vput(newdp);
VOP_UNLOCK(vp, 0, td);
if ((mp->mnt_flag & MNT_RDONLY) == 0)
error = vfs_allocate_syncvnode(mp);
vfs_unbusy(mp, td);
if ((error = VFS_START(mp, 0, td)) != 0) {
vrele(vp);
goto bad;
}
} else {
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp, td);
free((caddr_t)mp, M_MOUNT);
vput(vp);
goto bad;
}
return (0);
bad:
vfs_freeopts(optlist);
return (error);
}
/*
* Old Mount API.
*/
#ifndef _SYS_SYSPROTO_H_
struct mount_args {
char *type;
char *path;
int flags;
caddr_t data;
};
#endif
/* ARGSUSED */
int
mount(td, uap)
struct thread *td;
struct mount_args /* {
syscallarg(char *) type;
syscallarg(char *) path;
syscallarg(int) flags;
syscallarg(caddr_t) data;
} */ *uap;
{
char *fstype;
char *fspath;
int error;
fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
/*
* vfs_mount() actually takes a kernel string for `type' and
* `path' now, so extract them.
*/
error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
if (error)
goto finish;
error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
if (error)
goto finish;
error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
SCARG(uap, data));
finish:
free(fstype, M_TEMP);
free(fspath, M_TEMP);
return (error);
}
/*
* vfs_mount(): actually attempt a filesystem mount.
*
* This routine is designed to be a "generic" entry point for routines
* that wish to mount a filesystem. All parameters except `fsdata' are
* pointers into kernel space. `fsdata' is currently still a pointer
* into userspace.
*/
int
vfs_mount(td, fstype, fspath, fsflags, fsdata)
struct thread *td;
const char *fstype;
char *fspath;
int fsflags;
void *fsdata;
{
linker_file_t lf;
struct vnode *vp;
struct mount *mp;
struct vfsconf *vfsp;
int error, flag = 0, kern_flag = 0;
struct vattr va;
struct nameidata nd;
/*
* Be ultra-paranoid about making sure the type and fspath
* variables will fit in our mp buffers, including the
* terminating NUL.
*/
if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
return (ENAMETOOLONG);
if (usermount == 0) {
error = suser(td);
if (error)
return (error);
}
/*
* Do not allow NFS export by non-root users.
*/
if (fsflags & MNT_EXPORTED) {
error = suser(td);
if (error)
return (error);
}
/*
* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
*/
if (suser(td))
fsflags |= MNT_NOSUID | MNT_NODEV;
/*
* Get vnode to be covered
*/
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
if (fsflags & MNT_UPDATE) {
if ((vp->v_flag & VROOT) == 0) {
vput(vp);
return (EINVAL);
}
mp = vp->v_mount;
flag = mp->mnt_flag;
kern_flag = mp->mnt_kern_flag;
/*
* We only allow the filesystem to be reloaded if it
* is currently mounted read-only.
*/
if ((fsflags & MNT_RELOAD) &&
((mp->mnt_flag & MNT_RDONLY) == 0)) {
vput(vp);
return (EOPNOTSUPP); /* Needs translation */
}
/*
* Only root, or the user that did the original mount is
* permitted to update it.
*/
if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
error = suser(td);
if (error) {
vput(vp);
return (error);
}
}
if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
vput(vp);
return (EBUSY);
}
mtx_lock(&vp->v_interlock);
if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
mtx_unlock(&vp->v_interlock);
vfs_unbusy(mp, td);
vput(vp);
return (EBUSY);
}
vp->v_flag |= VMOUNT;
mtx_unlock(&vp->v_interlock);
mp->mnt_flag |= fsflags &
(MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
VOP_UNLOCK(vp, 0, td);
goto update;
}
/*
* If the user is not root, ensure that they own the directory
* onto which we are attempting to mount.
*/
error = VOP_GETATTR(vp, &va, td->td_ucred, td);
if (error) {
vput(vp);
return (error);
}
if (va.va_uid != td->td_ucred->cr_uid) {
error = suser(td);
if (error) {
vput(vp);
return (error);
}
}
if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
vput(vp);
return (error);
}
if (vp->v_type != VDIR) {
vput(vp);
return (ENOTDIR);
}
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (!strcmp(vfsp->vfc_name, fstype))
break;
if (vfsp == NULL) {
/* Only load modules for root (very important!). */
error = suser(td);
if (error) {
vput(vp);
return (error);
}
error = securelevel_gt(td->td_ucred, 0);
if (error) {
vput(vp);
return (error);
}
error = linker_load_file(fstype, &lf);
if (error || lf == NULL) {
vput(vp);
if (lf == NULL)
error = ENODEV;
return (error);
}
lf->userrefs++;
/* Look up again to see if the VFS was loaded. */
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (!strcmp(vfsp->vfc_name, fstype))
break;
if (vfsp == NULL) {
lf->userrefs--;
linker_file_unload(lf);
vput(vp);
return (ENODEV);
}
}
mtx_lock(&vp->v_interlock);
if ((vp->v_flag & VMOUNT) != 0 ||
vp->v_mountedhere != NULL) {
mtx_unlock(&vp->v_interlock);
vput(vp);
return (EBUSY);
}
vp->v_flag |= VMOUNT;
mtx_unlock(&vp->v_interlock);
/*
* Allocate and initialize the filesystem.
*/
mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
TAILQ_INIT(&mp->mnt_nvnodelist);
TAILQ_INIT(&mp->mnt_reservedvnlist);
lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
(void)vfs_busy(mp, LK_NOWAIT, 0, td);
mp->mnt_op = vfsp->vfc_vfsops;
mp->mnt_vfc = vfsp;
vfsp->vfc_refcount++;
mp->mnt_stat.f_type = vfsp->vfc_typenum;
mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
mp->mnt_vnodecovered = vp;
mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
mp->mnt_iosize_max = DFLTPHYS;
VOP_UNLOCK(vp, 0, td);
update:
/*
* Check if the fs implements the old VFS_MOUNT()
* function, since the old system call was used.
*/
if (mp->mnt_op->vfs_mount == NULL) {
printf("%s doesn't support the old mount syscall\n",
mp->mnt_vfc->vfc_name);
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
if (mp->mnt_flag & MNT_UPDATE)
vfs_unbusy(mp, td);
else {
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp, td);
free((caddr_t)mp, M_MOUNT);
}
vput(vp);
return (EOPNOTSUPP);
}
/*
* Set the mount level flags.
*/
if (fsflags & MNT_RDONLY)
mp->mnt_flag |= MNT_RDONLY;
else if (mp->mnt_flag & MNT_RDONLY)
mp->mnt_kern_flag |= MNTK_WANTRDWR;
mp->mnt_flag &=~ MNT_UPDATEMASK;
mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
/*
* Mount the filesystem.
* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
* get. No freeing of cn_pnbuf.
*/
error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
if (mp->mnt_flag & MNT_UPDATE) {
if (mp->mnt_kern_flag & MNTK_WANTRDWR)
mp->mnt_flag &= ~MNT_RDONLY;
mp->mnt_flag &=~
(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
if (error) {
mp->mnt_flag = flag;
mp->mnt_kern_flag = kern_flag;
}
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
if (mp->mnt_syncer == NULL)
error = vfs_allocate_syncvnode(mp);
} else {
if (mp->mnt_syncer != NULL)
vrele(mp->mnt_syncer);
mp->mnt_syncer = NULL;
}
vfs_unbusy(mp, td);
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
vrele(vp);
return (error);
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
/*
* Put the new filesystem on the mount list after root.
*/
cache_purge(vp);
if (!error) {
struct vnode *newdp;
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
vp->v_mountedhere = mp;
mtx_unlock(&vp->v_interlock);
mtx_lock(&mountlist_mtx);
TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
if (VFS_ROOT(mp, &newdp))
panic("mount: lost mount");
checkdirs(vp, newdp);
vput(newdp);
VOP_UNLOCK(vp, 0, td);
if ((mp->mnt_flag & MNT_RDONLY) == 0)
error = vfs_allocate_syncvnode(mp);
vfs_unbusy(mp, td);
if ((error = VFS_START(mp, 0, td)) != 0)
vrele(vp);
} else {
mtx_lock(&vp->v_interlock);
vp->v_flag &= ~VMOUNT;
mtx_unlock(&vp->v_interlock);
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp, td);
free((caddr_t)mp, M_MOUNT);
vput(vp);
}
return (error);
}
/*
* Scan all active processes to see if any of them have a current
* or root directory of `olddp'. If so, replace them with the new
* mount point.
*/
static void
checkdirs(olddp, newdp)
struct vnode *olddp, *newdp;
{
struct filedesc *fdp;
struct proc *p;
int nrele;
if (olddp->v_usecount == 1)
return;
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
PROC_LOCK(p);
fdp = p->p_fd;
if (fdp == NULL) {
PROC_UNLOCK(p);
continue;
}
nrele = 0;
FILEDESC_LOCK(fdp);
if (fdp->fd_cdir == olddp) {
VREF(newdp);
fdp->fd_cdir = newdp;
nrele++;
}
if (fdp->fd_rdir == olddp) {
VREF(newdp);
fdp->fd_rdir = newdp;
nrele++;
}
FILEDESC_UNLOCK(fdp);
PROC_UNLOCK(p);
while (nrele--)
vrele(olddp);
}
sx_sunlock(&allproc_lock);
if (rootvnode == olddp) {
vrele(rootvnode);
VREF(newdp);
rootvnode = newdp;
}
}
/*
* Unmount a file system.
*
* Note: unmount takes a path to the vnode mounted on as argument,
* not special file (as before).
*/
#ifndef _SYS_SYSPROTO_H_
struct unmount_args {
char *path;
int flags;
};
#endif
/* ARGSUSED */
int
unmount(td, uap)
struct thread *td;
register struct unmount_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
} */ *uap;
{
register struct vnode *vp;
struct mount *mp;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
mp = vp->v_mount;
/*
* Only root, or the user that did the original mount is
* permitted to unmount this filesystem.
*/
if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
error = suser(td);
if (error) {
vput(vp);
return (error);
}
}
/*
* Don't allow unmounting the root file system.
*/
if (mp->mnt_flag & MNT_ROOTFS) {
vput(vp);
return (EINVAL);
}
/*
* Must be the root of the filesystem
*/
if ((vp->v_flag & VROOT) == 0) {
vput(vp);
return (EINVAL);
}
vput(vp);
return (dounmount(mp, SCARG(uap, flags), td));
}
/*
* Do the actual file system unmount.
*/
int
dounmount(mp, flags, td)
struct mount *mp;
int flags;
struct thread *td;
{
struct vnode *coveredvp, *fsrootvp;
int error;
int async_flag;
mtx_lock(&mountlist_mtx);
mp->mnt_kern_flag |= MNTK_UNMOUNT;
error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
if (error) {
mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
if (mp->mnt_kern_flag & MNTK_MWAIT)
wakeup((caddr_t)mp);
return (error);
}
vn_start_write(NULL, &mp, V_WAIT);
if (mp->mnt_flag & MNT_EXPUBLIC)
vfs_setpublicfs(NULL, NULL, NULL);
vfs_msync(mp, MNT_WAIT);
async_flag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &=~ MNT_ASYNC;
cache_purgevfs(mp); /* remove cache entries for this file sys */
if (mp->mnt_syncer != NULL)
vrele(mp->mnt_syncer);
/* Move process cdir/rdir refs on fs root to underlying vnode. */
if (VFS_ROOT(mp, &fsrootvp) == 0) {
if (mp->mnt_vnodecovered != NULL)
checkdirs(fsrootvp, mp->mnt_vnodecovered);
if (fsrootvp == rootvnode) {
vrele(rootvnode);
rootvnode = NULL;
}
vput(fsrootvp);
}
if (((mp->mnt_flag & MNT_RDONLY) ||
(error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
(flags & MNT_FORCE)) {
error = VFS_UNMOUNT(mp, flags, td);
}
vn_finished_write(mp);
if (error) {
/* Undo cdir/rdir and rootvnode changes made above. */
if (VFS_ROOT(mp, &fsrootvp) == 0) {
if (mp->mnt_vnodecovered != NULL)
checkdirs(mp->mnt_vnodecovered, fsrootvp);
if (rootvnode == NULL) {
rootvnode = fsrootvp;
vref(rootvnode);
}
vput(fsrootvp);
}
if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
(void) vfs_allocate_syncvnode(mp);
mtx_lock(&mountlist_mtx);
mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
mp->mnt_flag |= async_flag;
lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
&mountlist_mtx, td);
if (mp->mnt_kern_flag & MNTK_MWAIT)
wakeup((caddr_t)mp);
return (error);
}
mtx_lock(&mountlist_mtx);
TAILQ_REMOVE(&mountlist, mp, mnt_list);
if ((coveredvp = mp->mnt_vnodecovered) != NULL)
coveredvp->v_mountedhere = NULL;
mp->mnt_vfc->vfc_refcount--;
if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
panic("unmount: dangling vnode");
lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
lockdestroy(&mp->mnt_lock);
if (coveredvp != NULL)
vrele(coveredvp);
if (mp->mnt_kern_flag & MNTK_MWAIT)
wakeup((caddr_t)mp);
if (mp->mnt_op->vfs_mount == NULL)
vfs_freeopts(mp->mnt_opt);
free((caddr_t)mp, M_MOUNT);
return (0);
}
/*
* Sync each mounted filesystem.
*/
#ifndef _SYS_SYSPROTO_H_
struct sync_args {
int dummy;
};
#endif
#ifdef DEBUG
static int syncprt = 0;
SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
#endif
/* ARGSUSED */
int
sync(td, uap)
struct thread *td;
struct sync_args *uap;
{
struct mount *mp, *nmp;
int asyncflag;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
nmp = TAILQ_NEXT(mp, mnt_list);
continue;
}
if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
vfs_msync(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_NOWAIT,
((td != NULL) ? td->td_ucred : NOCRED), td);
mp->mnt_flag |= asyncflag;
vn_finished_write(mp);
}
mtx_lock(&mountlist_mtx);
nmp = TAILQ_NEXT(mp, mnt_list);
vfs_unbusy(mp, td);
}
mtx_unlock(&mountlist_mtx);
#if 0
/*
* XXX don't call vfs_bufstats() yet because that routine
* was not imported in the Lite2 merge.
*/
#ifdef DIAGNOSTIC
if (syncprt)
vfs_bufstats();
#endif /* DIAGNOSTIC */
#endif
return (0);
}
/* XXX PRISON: could be per prison flag */
static int prison_quotas;
#if 0
SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
#endif
/*
* Change filesystem quotas.
*/
#ifndef _SYS_SYSPROTO_H_
struct quotactl_args {
char *path;
int cmd;
int uid;
caddr_t arg;
};
#endif
/* ARGSUSED */
int
quotactl(td, uap)
struct thread *td;
register struct quotactl_args /* {
syscallarg(char *) path;
syscallarg(int) cmd;
syscallarg(int) uid;
syscallarg(caddr_t) arg;
} */ *uap;
{
struct mount *mp;
int error;
struct nameidata nd;
if (jailed(td->td_ucred) && !prison_quotas)
return (EPERM);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
vrele(nd.ni_vp);
if (error)
return (error);
error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
SCARG(uap, arg), td);
vn_finished_write(mp);
return (error);
}
/*
* Get filesystem statistics.
*/
#ifndef _SYS_SYSPROTO_H_
struct statfs_args {
char *path;
struct statfs *buf;
};
#endif
/* ARGSUSED */
int
statfs(td, uap)
struct thread *td;
register struct statfs_args /* {
syscallarg(char *) path;
syscallarg(struct statfs *) buf;
} */ *uap;
{
register struct mount *mp;
register struct statfs *sp;
int error;
struct nameidata nd;
struct statfs sb;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
mp = nd.ni_vp->v_mount;
sp = &mp->mnt_stat;
NDFREE(&nd, NDF_ONLY_PNBUF);
vrele(nd.ni_vp);
error = VFS_STATFS(mp, sp, td);
if (error)
return (error);
sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
if (suser(td)) {
bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
sp = &sb;
}
return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
}
/*
* Get filesystem statistics.
*/
#ifndef _SYS_SYSPROTO_H_
struct fstatfs_args {
int fd;
struct statfs *buf;
};
#endif
/* ARGSUSED */
int
fstatfs(td, uap)
struct thread *td;
register struct fstatfs_args /* {
syscallarg(int) fd;
syscallarg(struct statfs *) buf;
} */ *uap;
{
struct file *fp;
struct mount *mp;
register struct statfs *sp;
int error;
struct statfs sb;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
mp = ((struct vnode *)fp->f_data)->v_mount;
fdrop(fp, td);
if (mp == NULL)
return (EBADF);
sp = &mp->mnt_stat;
error = VFS_STATFS(mp, sp, td);
if (error)
return (error);
sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
if (suser(td)) {
bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
sp = &sb;
}
return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
}
/*
* Get statistics on all filesystems.
*/
#ifndef _SYS_SYSPROTO_H_
struct getfsstat_args {
struct statfs *buf;
long bufsize;
int flags;
};
#endif
int
getfsstat(td, uap)
struct thread *td;
register struct getfsstat_args /* {
syscallarg(struct statfs *) buf;
syscallarg(long) bufsize;
syscallarg(int) flags;
} */ *uap;
{
register struct mount *mp, *nmp;
register struct statfs *sp;
caddr_t sfsp;
long count, maxcount, error;
maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
sfsp = (caddr_t)SCARG(uap, buf);
count = 0;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
nmp = TAILQ_NEXT(mp, mnt_list);
continue;
}
if (sfsp && count < maxcount) {
sp = &mp->mnt_stat;
/*
* If MNT_NOWAIT or MNT_LAZY is specified, do not
* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
* overrides MNT_WAIT.
*/
if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
(SCARG(uap, flags) & MNT_WAIT)) &&
(error = VFS_STATFS(mp, sp, td))) {
mtx_lock(&mountlist_mtx);
nmp = TAILQ_NEXT(mp, mnt_list);
vfs_unbusy(mp, td);
continue;
}
sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
if (error) {
vfs_unbusy(mp, td);
return (error);
}
sfsp += sizeof(*sp);
}
count++;
mtx_lock(&mountlist_mtx);
nmp = TAILQ_NEXT(mp, mnt_list);
vfs_unbusy(mp, td);
}
mtx_unlock(&mountlist_mtx);
if (sfsp && count > maxcount)
td->td_retval[0] = maxcount;
else
td->td_retval[0] = count;
return (0);
}
/*
* Change current working directory to a given file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct fchdir_args {
int fd;
};
#endif
/* ARGSUSED */
int
fchdir(td, uap)
struct thread *td;
struct fchdir_args /* {
syscallarg(int) fd;
} */ *uap;
{
register struct filedesc *fdp = td->td_proc->p_fd;
struct vnode *vp, *tdp, *vpold;
struct mount *mp;
struct file *fp;
int error;
if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
VREF(vp);
fdrop(fp, td);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
if (vp->v_type != VDIR)
error = ENOTDIR;
else
error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
while (!error && (mp = vp->v_mountedhere) != NULL) {
if (vfs_busy(mp, 0, 0, td))
continue;
error = VFS_ROOT(mp, &tdp);
vfs_unbusy(mp, td);
if (error)
break;
vput(vp);
vp = tdp;
}
if (error) {
vput(vp);
return (error);
}
VOP_UNLOCK(vp, 0, td);
FILEDESC_LOCK(fdp);
vpold = fdp->fd_cdir;
fdp->fd_cdir = vp;
FILEDESC_UNLOCK(fdp);
vrele(vpold);
return (0);
}
/*
* Change current working directory (``.'').
*/
#ifndef _SYS_SYSPROTO_H_
struct chdir_args {
char *path;
};
#endif
/* ARGSUSED */
int
chdir(td, uap)
struct thread *td;
struct chdir_args /* {
syscallarg(char *) path;
} */ *uap;
{
register struct filedesc *fdp = td->td_proc->p_fd;
int error;
struct nameidata nd;
struct vnode *vp;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = change_dir(&nd, td)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
FILEDESC_LOCK(fdp);
vp = fdp->fd_cdir;
fdp->fd_cdir = nd.ni_vp;
FILEDESC_UNLOCK(fdp);
vrele(vp);
return (0);
}
/*
* Helper function for raised chroot(2) security function: Refuse if
* any filedescriptors are open directories.
*/
static int
chroot_refuse_vdir_fds(fdp)
struct filedesc *fdp;
{
struct vnode *vp;
struct file *fp;
int fd;
FILEDESC_LOCK(fdp);
for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
fp = fget_locked(fdp, fd);
if (fp == NULL)
continue;
if (fp->f_type == DTYPE_VNODE) {
vp = (struct vnode *)fp->f_data;
if (vp->v_type == VDIR) {
FILEDESC_UNLOCK(fdp);
return (EPERM);
}
}
}
FILEDESC_UNLOCK(fdp);
return (0);
}
/*
* This sysctl determines if we will allow a process to chroot(2) if it
* has a directory open:
* 0: disallowed for all processes.
* 1: allowed for processes that were not already chroot(2)'ed.
* 2: allowed for all processes.
*/
static int chroot_allow_open_directories = 1;
SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
&chroot_allow_open_directories, 0, "");
/*
* Change notion of root (``/'') directory.
*/
#ifndef _SYS_SYSPROTO_H_
struct chroot_args {
char *path;
};
#endif
/* ARGSUSED */
int
chroot(td, uap)
struct thread *td;
struct chroot_args /* {
syscallarg(char *) path;
} */ *uap;
{
register struct filedesc *fdp = td->td_proc->p_fd;
int error;
struct nameidata nd;
struct vnode *vp;
error = suser_cred(td->td_ucred, PRISON_ROOT);
if (error)
return (error);
FILEDESC_LOCK(fdp);
if (chroot_allow_open_directories == 0 ||
(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
FILEDESC_UNLOCK(fdp);
error = chroot_refuse_vdir_fds(fdp);
} else
FILEDESC_UNLOCK(fdp);
if (error)
return (error);
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = change_dir(&nd, td)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
FILEDESC_LOCK(fdp);
vp = fdp->fd_rdir;
fdp->fd_rdir = nd.ni_vp;
if (!fdp->fd_jdir) {
fdp->fd_jdir = nd.ni_vp;
VREF(fdp->fd_jdir);
}
FILEDESC_UNLOCK(fdp);
vrele(vp);
return (0);
}
/*
* Common routine for chroot and chdir.
*/
static int
change_dir(ndp, td)
register struct nameidata *ndp;
struct thread *td;
{
struct vnode *vp;
int error;
error = namei(ndp);
if (error)
return (error);
vp = ndp->ni_vp;
if (vp->v_type != VDIR)
error = ENOTDIR;
else
error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
if (error)
vput(vp);
else
VOP_UNLOCK(vp, 0, td);
return (error);
}
/*
* Check permissions, allocate an open file structure,
* and call the device open routine if any.
*/
#ifndef _SYS_SYSPROTO_H_
struct open_args {
char *path;
int flags;
int mode;
};
#endif
int
open(td, uap)
struct thread *td;
register struct open_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
syscallarg(int) mode;
} */ *uap;
{
struct proc *p = td->td_proc;
struct filedesc *fdp = p->p_fd;
struct file *fp;
struct vnode *vp;
struct vattr vat;
struct mount *mp;
int cmode, flags, oflags;
struct file *nfp;
int type, indx, error;
struct flock lf;
struct nameidata nd;
oflags = SCARG(uap, flags);
if ((oflags & O_ACCMODE) == O_ACCMODE)
return (EINVAL);
flags = FFLAGS(oflags);
error = falloc(td, &nfp, &indx);
if (error)
return (error);
fp = nfp;
FILEDESC_LOCK(fdp);
cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
FILEDESC_UNLOCK(fdp);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
td->td_dupfd = -indx - 1; /* XXX check for fdopen */
/*
* Bump the ref count to prevent another process from closing
* the descriptor while we are blocked in vn_open()
*/
fhold(fp);
error = vn_open(&nd, &flags, cmode);
if (error) {
/*
* release our own reference
*/
fdrop(fp, td);
/*
* handle special fdopen() case. bleh. dupfdopen() is
* responsible for dropping the old contents of ofiles[indx]
* if it succeeds.
*/
if ((error == ENODEV || error == ENXIO) &&
td->td_dupfd >= 0 && /* XXX from fdopen */
(error =
dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
td->td_retval[0] = indx;
return (0);
}
/*
* Clean up the descriptor, but only if another thread hadn't
* replaced or closed it.
*/
FILEDESC_LOCK(fdp);
if (fdp->fd_ofiles[indx] == fp) {
fdp->fd_ofiles[indx] = NULL;
FILEDESC_UNLOCK(fdp);
fdrop(fp, td);
} else
FILEDESC_UNLOCK(fdp);
if (error == ERESTART)
error = EINTR;
return (error);
}
td->td_dupfd = 0;
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
/*
* There should be 2 references on the file, one from the descriptor
* table, and one for us.
*
* Handle the case where someone closed the file (via its file
* descriptor) while we were blocked. The end result should look
* like opening the file succeeded but it was immediately closed.
*/
FILEDESC_LOCK(fdp);
FILE_LOCK(fp);
if (fp->f_count == 1) {
KASSERT(fdp->fd_ofiles[indx] != fp,
("Open file descriptor lost all refs"));
FILEDESC_UNLOCK(fdp);
FILE_UNLOCK(fp);
VOP_UNLOCK(vp, 0, td);
vn_close(vp, flags & FMASK, fp->f_cred, td);
fdrop(fp, td);
td->td_retval[0] = indx;
return 0;
}
fp->f_data = (caddr_t)vp;
fp->f_flag = flags & FMASK;
fp->f_ops = &vnops;
fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
FILEDESC_UNLOCK(fdp);
FILE_UNLOCK(fp);
VOP_UNLOCK(vp, 0, td);
if (flags & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
if (flags & O_EXLOCK)
lf.l_type = F_WRLCK;
else
lf.l_type = F_RDLCK;
type = F_FLOCK;
if ((flags & FNONBLOCK) == 0)
type |= F_WAIT;
if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
goto bad;
fp->f_flag |= FHASLOCK;
}
if (flags & O_TRUNC) {
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
goto bad;
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
VATTR_NULL(&vat);
vat.va_size = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
if (error)
goto bad;
}
/* assert that vn_open created a backing object if one is needed */
KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
("open: vmio vnode has no backing object after vn_open"));
/*
* Release our private reference, leaving the one associated with
* the descriptor table intact.
*/
fdrop(fp, td);
td->td_retval[0] = indx;
return (0);
bad:
FILEDESC_LOCK(fdp);
if (fdp->fd_ofiles[indx] == fp) {
fdp->fd_ofiles[indx] = NULL;
FILEDESC_UNLOCK(fdp);
fdrop(fp, td);
} else
FILEDESC_UNLOCK(fdp);
return (error);
}
#ifdef COMPAT_43
/*
* Create a file.
*/
#ifndef _SYS_SYSPROTO_H_
struct ocreat_args {
char *path;
int mode;
};
#endif
int
ocreat(td, uap)
struct thread *td;
register struct ocreat_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap;
{
struct open_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
syscallarg(int) mode;
} */ nuap;
SCARG(&nuap, path) = SCARG(uap, path);
SCARG(&nuap, mode) = SCARG(uap, mode);
SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
return (open(td, &nuap));
}
#endif /* COMPAT_43 */
/*
* Create a special file.
*/
#ifndef _SYS_SYSPROTO_H_
struct mknod_args {
char *path;
int mode;
int dev;
};
#endif
/* ARGSUSED */
int
mknod(td, uap)
struct thread *td;
register struct mknod_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
syscallarg(int) dev;
} */ *uap;
{
struct vnode *vp;
struct mount *mp;
struct vattr vattr;
int error;
int whiteout = 0;
struct nameidata nd;
switch (SCARG(uap, mode) & S_IFMT) {
case S_IFCHR:
case S_IFBLK:
error = suser(td);
break;
default:
error = suser_cred(td->td_ucred, PRISON_ROOT);
break;
}
if (error)
return (error);
restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
if (vp != NULL) {
vrele(vp);
error = EEXIST;
} else {
VATTR_NULL(&vattr);
FILEDESC_LOCK(td->td_proc->p_fd);
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
FILEDESC_UNLOCK(td->td_proc->p_fd);
vattr.va_rdev = SCARG(uap, dev);
whiteout = 0;
switch (SCARG(uap, mode) & S_IFMT) {
case S_IFMT: /* used by badsect to flag bad sectors */
vattr.va_type = VBAD;
break;
case S_IFCHR:
vattr.va_type = VCHR;
break;
case S_IFBLK:
vattr.va_type = VBLK;
break;
case S_IFWHT:
whiteout = 1;
break;
default:
error = EINVAL;
break;
}
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
if (!error) {
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
if (whiteout)
error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
else {
error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
&nd.ni_cnd, &vattr);
if (error == 0)
vput(nd.ni_vp);
}
}
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
return (error);
}
/*
* Create a named pipe.
*/
#ifndef _SYS_SYSPROTO_H_
struct mkfifo_args {
char *path;
int mode;
};
#endif
/* ARGSUSED */
int
mkfifo(td, uap)
struct thread *td;
register struct mkfifo_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap;
{
struct mount *mp;
struct vattr vattr;
int error;
struct nameidata nd;
restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
if (nd.ni_vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vrele(nd.ni_vp);
vput(nd.ni_dvp);
return (EEXIST);
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VATTR_NULL(&vattr);
vattr.va_type = VFIFO;
FILEDESC_LOCK(td->td_proc->p_fd);
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
FILEDESC_UNLOCK(td->td_proc->p_fd);
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
if (error == 0)
vput(nd.ni_vp);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
return (error);
}
/*
* Make a hard file link.
*/
#ifndef _SYS_SYSPROTO_H_
struct link_args {
char *path;
char *link;
};
#endif
/* ARGSUSED */
int
link(td, uap)
struct thread *td;
register struct link_args /* {
syscallarg(char *) path;
syscallarg(char *) link;
} */ *uap;
{
struct vnode *vp;
struct mount *mp;
struct nameidata nd;
int error;
bwillwrite();
NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
if (vp->v_type == VDIR) {
vrele(vp);
return (EPERM); /* POSIX */
}
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
vrele(vp);
return (error);
}
NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
if ((error = namei(&nd)) == 0) {
if (nd.ni_vp != NULL) {
vrele(nd.ni_vp);
error = EEXIST;
} else {
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
}
vrele(vp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
return (error);
}
/*
* Make a symbolic link.
*/
#ifndef _SYS_SYSPROTO_H_
struct symlink_args {
char *path;
char *link;
};
#endif
/* ARGSUSED */
int
symlink(td, uap)
struct thread *td;
register struct symlink_args /* {
syscallarg(char *) path;
syscallarg(char *) link;
} */ *uap;
{
struct mount *mp;
struct vattr vattr;
char *path;
int error;
struct nameidata nd;
path = uma_zalloc(namei_zone, M_WAITOK);
if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
goto out;
restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
if ((error = namei(&nd)) != 0)
goto out;
if (nd.ni_vp) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vrele(nd.ni_vp);
vput(nd.ni_dvp);
error = EEXIST;
goto out;
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VATTR_NULL(&vattr);
FILEDESC_LOCK(td->td_proc->p_fd);
vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
FILEDESC_UNLOCK(td->td_proc->p_fd);
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error == 0)
vput(nd.ni_vp);
vput(nd.ni_dvp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
out:
uma_zfree(namei_zone, path);
return (error);
}
/*
* Delete a whiteout from the filesystem.
*/
/* ARGSUSED */
int
undelete(td, uap)
struct thread *td;
register struct undelete_args /* {
syscallarg(char *) path;
} */ *uap;
{
int error;
struct mount *mp;
struct nameidata nd;
restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
SCARG(uap, path), td);
error = namei(&nd);
if (error)
return (error);
if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_vp)
vrele(nd.ni_vp);
vput(nd.ni_dvp);
return (EEXIST);
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
return (error);
}
/*
* Delete a name from the filesystem.
*/
#ifndef _SYS_SYSPROTO_H_
struct unlink_args {
char *path;
};
#endif
/* ARGSUSED */
int
unlink(td, uap)
struct thread *td;
struct unlink_args /* {
syscallarg(char *) path;
} */ *uap;
{
struct mount *mp;
struct vnode *vp;
int error;
struct nameidata nd;
restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
if (vp->v_type == VDIR)
error = EPERM; /* POSIX */
else {
/*
* The root of a mounted filesystem cannot be deleted.
*
* XXX: can this only be a VDIR case?
*/
if (vp->v_flag & VROOT)
error = EBUSY;
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vrele(vp);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
if (!error) {
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vput(vp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
return (error);
}
/*
* Reposition read/write file offset.
*/
#ifndef _SYS_SYSPROTO_H_
struct lseek_args {
int fd;
int pad;
off_t offset;
int whence;
};
#endif
int
lseek(td, uap)
struct thread *td;
register struct lseek_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) offset;
syscallarg(int) whence;
} */ *uap;
{
struct ucred *cred = td->td_ucred;
struct file *fp;
struct vnode *vp;
struct vattr vattr;
off_t offset;
int error, noneg;
if ((error = fget(td, uap->fd, &fp)) != 0)
return (error);
if (fp->f_type != DTYPE_VNODE) {
fdrop(fp, td);
return (ESPIPE);
}
vp = (struct vnode *)fp->f_data;
noneg = (vp->v_type != VCHR);
offset = SCARG(uap, offset);
switch (SCARG(uap, whence)) {
case L_INCR:
if (noneg &&
(fp->f_offset < 0 ||
(offset > 0 && fp->f_offset > OFF_MAX - offset)))
return (EOVERFLOW);
offset += fp->f_offset;
break;
case L_XTND:
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
error = VOP_GETATTR(vp, &vattr, cred, td);
VOP_UNLOCK(vp, 0, td);
if (error)
return (error);
if (noneg &&
(vattr.va_size > OFF_MAX ||
(offset > 0 && vattr.va_size > OFF_MAX - offset)))
return (EOVERFLOW);
offset += vattr.va_size;
break;
case L_SET:
break;
default:
fdrop(fp, td);
return (EINVAL);
}
if (noneg && offset < 0)
return (EINVAL);
fp->f_offset = offset;
*(off_t *)(td->td_retval) = fp->f_offset;
fdrop(fp, td);
return (0);
}
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
/*
* Reposition read/write file offset.
*/
#ifndef _SYS_SYSPROTO_H_
struct olseek_args {
int fd;
long offset;
int whence;
};
#endif
int
olseek(td, uap)
struct thread *td;
register struct olseek_args /* {
syscallarg(int) fd;
syscallarg(long) offset;
syscallarg(int) whence;
} */ *uap;
{
struct lseek_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) offset;
syscallarg(int) whence;
} */ nuap;
int error;
SCARG(&nuap, fd) = SCARG(uap, fd);
SCARG(&nuap, offset) = SCARG(uap, offset);
SCARG(&nuap, whence) = SCARG(uap, whence);
error = lseek(td, &nuap);
return (error);
}
#endif /* COMPAT_43 */
/*
* Check access permissions using passed credentials.
*/
static int
vn_access(vp, user_flags, cred, td)
struct vnode *vp;
int user_flags;
struct ucred *cred;
struct thread *td;
{
int error, flags;
/* Flags == 0 means only check for existence. */
error = 0;
if (user_flags) {
flags = 0;
if (user_flags & R_OK)
flags |= VREAD;
if (user_flags & W_OK)
flags |= VWRITE;
if (user_flags & X_OK)
flags |= VEXEC;
if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
error = VOP_ACCESS(vp, flags, cred, td);
}
return (error);
}
/*
* Check access permissions using "real" credentials.
*/
#ifndef _SYS_SYSPROTO_H_
struct access_args {
char *path;
int flags;
};
#endif
int
access(td, uap)
struct thread *td;
register struct access_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
} */ *uap;
{
struct ucred *cred, *tmpcred;
register struct vnode *vp;
int error;
struct nameidata nd;
/*
* Create and modify a temporary credential instead of one that
* is potentially shared. This could also mess up socket
* buffer accounting which can run in an interrupt context.
*
* XXX - Depending on how "threads" are finally implemented, it
* may be better to explicitly pass the credential to namei()
* rather than to modify the potentially shared process structure.
*/
cred = td->td_ucred;
tmpcred = crdup(cred);
tmpcred->cr_uid = cred->cr_ruid;
tmpcred->cr_groups[0] = cred->cr_rgid;
td->td_ucred = tmpcred;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
goto out1;
vp = nd.ni_vp;
error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(vp);
out1:
td->td_ucred = cred;
crfree(tmpcred);
return (error);
}
/*
* Check access permissions using "effective" credentials.
*/
#ifndef _SYS_SYSPROTO_H_
struct eaccess_args {
char *path;
int flags;
};
#endif
int
eaccess(td, uap)
struct thread *td;
register struct eaccess_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
} */ *uap;
{
struct nameidata nd;
struct vnode *vp;
int error;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(vp);
return (error);
}
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
/*
* Get file status; this version follows links.
*/
#ifndef _SYS_SYSPROTO_H_
struct ostat_args {
char *path;
struct ostat *ub;
};
#endif
/* ARGSUSED */
int
ostat(td, uap)
struct thread *td;
register struct ostat_args /* {
syscallarg(char *) path;
syscallarg(struct ostat *) ub;
} */ *uap;
{
struct stat sb;
struct ostat osb;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = vn_stat(nd.ni_vp, &sb, td);
vput(nd.ni_vp);
if (error)
return (error);
cvtstat(&sb, &osb);
error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
return (error);
}
/*
* Get file status; this version does not follow links.
*/
#ifndef _SYS_SYSPROTO_H_
struct olstat_args {
char *path;
struct ostat *ub;
};
#endif
/* ARGSUSED */
int
olstat(td, uap)
struct thread *td;
register struct olstat_args /* {
syscallarg(char *) path;
syscallarg(struct ostat *) ub;
} */ *uap;
{
struct vnode *vp;
struct stat sb;
struct ostat osb;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
error = vn_stat(vp, &sb, td);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(vp);
if (error)
return (error);
cvtstat(&sb, &osb);
error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
return (error);
}
/*
* Convert from an old to a new stat structure.
*/
void
cvtstat(st, ost)
struct stat *st;
struct ostat *ost;
{
ost->st_dev = st->st_dev;
ost->st_ino = st->st_ino;
ost->st_mode = st->st_mode;
ost->st_nlink = st->st_nlink;
ost->st_uid = st->st_uid;
ost->st_gid = st->st_gid;
ost->st_rdev = st->st_rdev;
if (st->st_size < (quad_t)1 << 32)
ost->st_size = st->st_size;
else
ost->st_size = -2;
ost->st_atime = st->st_atime;
ost->st_mtime = st->st_mtime;
ost->st_ctime = st->st_ctime;
ost->st_blksize = st->st_blksize;
ost->st_blocks = st->st_blocks;
ost->st_flags = st->st_flags;
ost->st_gen = st->st_gen;
}
#endif /* COMPAT_43 || COMPAT_SUNOS */
/*
* Get file status; this version follows links.
*/
#ifndef _SYS_SYSPROTO_H_
struct stat_args {
char *path;
struct stat *ub;
};
#endif
/* ARGSUSED */
int
stat(td, uap)
struct thread *td;
register struct stat_args /* {
syscallarg(char *) path;
syscallarg(struct stat *) ub;
} */ *uap;
{
struct stat sb;
int error;
struct nameidata nd;
#ifdef LOOKUP_SHARED
NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
UIO_USERSPACE, SCARG(uap, path), td);
#else
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
#endif
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, td);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_vp);
if (error)
return (error);
error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
return (error);
}
/*
* Get file status; this version does not follow links.
*/
#ifndef _SYS_SYSPROTO_H_
struct lstat_args {
char *path;
struct stat *ub;
};
#endif
/* ARGSUSED */
int
lstat(td, uap)
struct thread *td;
register struct lstat_args /* {
syscallarg(char *) path;
syscallarg(struct stat *) ub;
} */ *uap;
{
int error;
struct vnode *vp;
struct stat sb;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
error = vn_stat(vp, &sb, td);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(vp);
if (error)
return (error);
error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
return (error);
}
/*
* Implementation of the NetBSD stat() function.
* XXX This should probably be collapsed with the FreeBSD version,
* as the differences are only due to vn_stat() clearing spares at
* the end of the structures. vn_stat could be split to avoid this,
* and thus collapse the following to close to zero code.
*/
void
cvtnstat(sb, nsb)
struct stat *sb;
struct nstat *nsb;
{
nsb->st_dev = sb->st_dev;
nsb->st_ino = sb->st_ino;
nsb->st_mode = sb->st_mode;
nsb->st_nlink = sb->st_nlink;
nsb->st_uid = sb->st_uid;
nsb->st_gid = sb->st_gid;
nsb->st_rdev = sb->st_rdev;
nsb->st_atimespec = sb->st_atimespec;
nsb->st_mtimespec = sb->st_mtimespec;
nsb->st_ctimespec = sb->st_ctimespec;
nsb->st_size = sb->st_size;
nsb->st_blocks = sb->st_blocks;
nsb->st_blksize = sb->st_blksize;
nsb->st_flags = sb->st_flags;
nsb->st_gen = sb->st_gen;
nsb->st_qspare[0] = sb->st_qspare[0];
nsb->st_qspare[1] = sb->st_qspare[1];
}
#ifndef _SYS_SYSPROTO_H_
struct nstat_args {
char *path;
struct nstat *ub;
};
#endif
/* ARGSUSED */
int
nstat(td, uap)
struct thread *td;
register struct nstat_args /* {
syscallarg(char *) path;
syscallarg(struct nstat *) ub;
} */ *uap;
{
struct stat sb;
struct nstat nsb;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = vn_stat(nd.ni_vp, &sb, td);
vput(nd.ni_vp);
if (error)
return (error);
cvtnstat(&sb, &nsb);
error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
return (error);
}
/*
* NetBSD lstat. Get file status; this version does not follow links.
*/
#ifndef _SYS_SYSPROTO_H_
struct lstat_args {
char *path;
struct stat *ub;
};
#endif
/* ARGSUSED */
int
nlstat(td, uap)
struct thread *td;
register struct nlstat_args /* {
syscallarg(char *) path;
syscallarg(struct nstat *) ub;
} */ *uap;
{
int error;
struct vnode *vp;
struct stat sb;
struct nstat nsb;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
error = vn_stat(vp, &sb, td);
vput(vp);
if (error)
return (error);
cvtnstat(&sb, &nsb);
error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
return (error);
}
/*
* Get configurable pathname variables.
*/
#ifndef _SYS_SYSPROTO_H_
struct pathconf_args {
char *path;
int name;
};
#endif
/* ARGSUSED */
int
pathconf(td, uap)
struct thread *td;
register struct pathconf_args /* {
syscallarg(char *) path;
syscallarg(int) name;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
vput(nd.ni_vp);
return (error);
}
/*
* Return target name of a symbolic link.
*/
#ifndef _SYS_SYSPROTO_H_
struct readlink_args {
char *path;
char *buf;
int count;
};
#endif
/* ARGSUSED */
int
readlink(td, uap)
struct thread *td;
register struct readlink_args /* {
syscallarg(char *) path;
syscallarg(char *) buf;
syscallarg(int) count;
} */ *uap;
{
register struct vnode *vp;
struct iovec aiov;
struct uio auio;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
if (vp->v_type != VLNK)
error = EINVAL;
else {
aiov.iov_base = SCARG(uap, buf);
aiov.iov_len = SCARG(uap, count);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auio.uio_resid = SCARG(uap, count);
error = VOP_READLINK(vp, &auio, td->td_ucred);
}
vput(vp);
td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
return (error);
}
/*
* Common implementation code for chflags() and fchflags().
*/
static int
setfflags(td, vp, flags)
struct thread *td;
struct vnode *vp;
int flags;
{
int error;
struct mount *mp;
struct vattr vattr;
/*
* Prevent non-root users from setting flags on devices. When
* a device is reused, users can retain ownership of the device
* if they are allowed to set flags and programs assume that
* chown can't fail when done as root.
*/
if (vp->v_type == VCHR || vp->v_type == VBLK) {
error = suser_cred(td->td_ucred, PRISON_ROOT);
if (error)
return (error);
}
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
VATTR_NULL(&vattr);
vattr.va_flags = flags;
error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return (error);
}
/*
* Change flags of a file given a path name.
*/
#ifndef _SYS_SYSPROTO_H_
struct chflags_args {
char *path;
int flags;
};
#endif
/* ARGSUSED */
int
chflags(td, uap)
struct thread *td;
register struct chflags_args /* {
syscallarg(char *) path;
syscallarg(int) flags;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
vrele(nd.ni_vp);
return error;
}
/*
* Change flags of a file given a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct fchflags_args {
int fd;
int flags;
};
#endif
/* ARGSUSED */
int
fchflags(td, uap)
struct thread *td;
register struct fchflags_args /* {
syscallarg(int) fd;
syscallarg(int) flags;
} */ *uap;
{
struct file *fp;
int error;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
fdrop(fp, td);
return (error);
}
/*
* Common implementation code for chmod(), lchmod() and fchmod().
*/
static int
setfmode(td, vp, mode)
struct thread *td;
struct vnode *vp;
int mode;
{
int error;
struct mount *mp;
struct vattr vattr;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
VATTR_NULL(&vattr);
vattr.va_mode = mode & ALLPERMS;
error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return error;
}
/*
* Change mode of a file given path name.
*/
#ifndef _SYS_SYSPROTO_H_
struct chmod_args {
char *path;
int mode;
};
#endif
/* ARGSUSED */
int
chmod(td, uap)
struct thread *td;
register struct chmod_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
vrele(nd.ni_vp);
return error;
}
/*
* Change mode of a file given path name (don't follow links.)
*/
#ifndef _SYS_SYSPROTO_H_
struct lchmod_args {
char *path;
int mode;
};
#endif
/* ARGSUSED */
int
lchmod(td, uap)
struct thread *td;
register struct lchmod_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
vrele(nd.ni_vp);
return error;
}
/*
* Change mode of a file given a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct fchmod_args {
int fd;
int mode;
};
#endif
/* ARGSUSED */
int
fchmod(td, uap)
struct thread *td;
register struct fchmod_args /* {
syscallarg(int) fd;
syscallarg(int) mode;
} */ *uap;
{
struct file *fp;
struct vnode *vp;
int error;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
fdrop(fp, td);
return (error);
}
/*
* Common implementation for chown(), lchown(), and fchown()
*/
static int
setfown(td, vp, uid, gid)
struct thread *td;
struct vnode *vp;
uid_t uid;
gid_t gid;
{
int error;
struct mount *mp;
struct vattr vattr;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
VATTR_NULL(&vattr);
vattr.va_uid = uid;
vattr.va_gid = gid;
error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return error;
}
/*
* Set ownership given a path name.
*/
#ifndef _SYS_SYSPROTO_H_
struct chown_args {
char *path;
int uid;
int gid;
};
#endif
/* ARGSUSED */
int
chown(td, uap)
struct thread *td;
register struct chown_args /* {
syscallarg(char *) path;
syscallarg(int) uid;
syscallarg(int) gid;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
vrele(nd.ni_vp);
return (error);
}
/*
* Set ownership given a path name, do not cross symlinks.
*/
#ifndef _SYS_SYSPROTO_H_
struct lchown_args {
char *path;
int uid;
int gid;
};
#endif
/* ARGSUSED */
int
lchown(td, uap)
struct thread *td;
register struct lchown_args /* {
syscallarg(char *) path;
syscallarg(int) uid;
syscallarg(int) gid;
} */ *uap;
{
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
vrele(nd.ni_vp);
return (error);
}
/*
* Set ownership given a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct fchown_args {
int fd;
int uid;
int gid;
};
#endif
/* ARGSUSED */
int
fchown(td, uap)
struct thread *td;
register struct fchown_args /* {
syscallarg(int) fd;
syscallarg(int) uid;
syscallarg(int) gid;
} */ *uap;
{
struct file *fp;
struct vnode *vp;
int error;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
error = setfown(td, (struct vnode *)fp->f_data,
SCARG(uap, uid), SCARG(uap, gid));
fdrop(fp, td);
return (error);
}
/*
* Common implementation code for utimes(), lutimes(), and futimes().
*/
static int
getutimes(usrtvp, tsp)
const struct timeval *usrtvp;
struct timespec *tsp;
{
struct timeval tv[2];
int error;
if (usrtvp == NULL) {
microtime(&tv[0]);
TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
tsp[1] = tsp[0];
} else {
if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
return (error);
TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
}
return 0;
}
/*
* Common implementation code for utimes(), lutimes(), and futimes().
*/
static int
setutimes(td, vp, ts, nullflag)
struct thread *td;
struct vnode *vp;
const struct timespec *ts;
int nullflag;
{
int error;
struct mount *mp;
struct vattr vattr;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
VATTR_NULL(&vattr);
vattr.va_atime = ts[0];
vattr.va_mtime = ts[1];
if (nullflag)
vattr.va_vaflags |= VA_UTIMES_NULL;
error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return error;
}
/*
* Set the access and modification times of a file.
*/
#ifndef _SYS_SYSPROTO_H_
struct utimes_args {
char *path;
struct timeval *tptr;
};
#endif
/* ARGSUSED */
int
utimes(td, uap)
struct thread *td;
register struct utimes_args /* {
syscallarg(char *) path;
syscallarg(struct timeval *) tptr;
} */ *uap;
{
struct timespec ts[2];
struct timeval *usrtvp;
int error;
struct nameidata nd;
usrtvp = SCARG(uap, tptr);
if ((error = getutimes(usrtvp, ts)) != 0)
return (error);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
vrele(nd.ni_vp);
return (error);
}
/*
* Set the access and modification times of a file.
*/
#ifndef _SYS_SYSPROTO_H_
struct lutimes_args {
char *path;
struct timeval *tptr;
};
#endif
/* ARGSUSED */
int
lutimes(td, uap)
struct thread *td;
register struct lutimes_args /* {
syscallarg(char *) path;
syscallarg(struct timeval *) tptr;
} */ *uap;
{
struct timespec ts[2];
struct timeval *usrtvp;
int error;
struct nameidata nd;
usrtvp = SCARG(uap, tptr);
if ((error = getutimes(usrtvp, ts)) != 0)
return (error);
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
vrele(nd.ni_vp);
return (error);
}
/*
* Set the access and modification times of a file.
*/
#ifndef _SYS_SYSPROTO_H_
struct futimes_args {
int fd;
struct timeval *tptr;
};
#endif
/* ARGSUSED */
int
futimes(td, uap)
struct thread *td;
register struct futimes_args /* {
syscallarg(int ) fd;
syscallarg(struct timeval *) tptr;
} */ *uap;
{
struct timespec ts[2];
struct file *fp;
struct timeval *usrtvp;
int error;
usrtvp = SCARG(uap, tptr);
if ((error = getutimes(usrtvp, ts)) != 0)
return (error);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
fdrop(fp, td);
return (error);
}
/*
* Truncate a file given its path name.
*/
#ifndef _SYS_SYSPROTO_H_
struct truncate_args {
char *path;
int pad;
off_t length;
};
#endif
/* ARGSUSED */
int
truncate(td, uap)
struct thread *td;
register struct truncate_args /* {
syscallarg(char *) path;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ *uap;
{
struct mount *mp;
struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
if (uap->length < 0)
return(EINVAL);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
vrele(vp);
return (error);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0 &&
(error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
VATTR_NULL(&vattr);
vattr.va_size = SCARG(uap, length);
error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
}
vput(vp);
vn_finished_write(mp);
return (error);
}
/*
* Truncate a file given a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct ftruncate_args {
int fd;
int pad;
off_t length;
};
#endif
/* ARGSUSED */
int
ftruncate(td, uap)
struct thread *td;
register struct ftruncate_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ *uap;
{
struct mount *mp;
struct vattr vattr;
struct vnode *vp;
struct file *fp;
int error;
if (uap->length < 0)
return(EINVAL);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
if ((fp->f_flag & FWRITE) == 0) {
fdrop(fp, td);
return (EINVAL);
}
vp = (struct vnode *)fp->f_data;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
fdrop(fp, td);
return (error);
}
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0) {
VATTR_NULL(&vattr);
vattr.va_size = SCARG(uap, length);
error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
}
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
fdrop(fp, td);
return (error);
}
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
/*
* Truncate a file given its path name.
*/
#ifndef _SYS_SYSPROTO_H_
struct otruncate_args {
char *path;
long length;
};
#endif
/* ARGSUSED */
int
otruncate(td, uap)
struct thread *td;
register struct otruncate_args /* {
syscallarg(char *) path;
syscallarg(long) length;
} */ *uap;
{
struct truncate_args /* {
syscallarg(char *) path;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ nuap;
SCARG(&nuap, path) = SCARG(uap, path);
SCARG(&nuap, length) = SCARG(uap, length);
return (truncate(td, &nuap));
}
/*
* Truncate a file given a file descriptor.
*/
#ifndef _SYS_SYSPROTO_H_
struct oftruncate_args {
int fd;
long length;
};
#endif
/* ARGSUSED */
int
oftruncate(td, uap)
struct thread *td;
register struct oftruncate_args /* {
syscallarg(int) fd;
syscallarg(long) length;
} */ *uap;
{
struct ftruncate_args /* {
syscallarg(int) fd;
syscallarg(int) pad;
syscallarg(off_t) length;
} */ nuap;
SCARG(&nuap, fd) = SCARG(uap, fd);
SCARG(&nuap, length) = SCARG(uap, length);
return (ftruncate(td, &nuap));
}
#endif /* COMPAT_43 || COMPAT_SUNOS */
/*
* Sync an open file.
*/
#ifndef _SYS_SYSPROTO_H_
struct fsync_args {
int fd;
};
#endif
/* ARGSUSED */
int
fsync(td, uap)
struct thread *td;
struct fsync_args /* {
syscallarg(int) fd;
} */ *uap;
{
struct vnode *vp;
struct mount *mp;
struct file *fp;
vm_object_t obj;
int error;
GIANT_REQUIRED;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
fdrop(fp, td);
return (error);
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
if (VOP_GETVOBJECT(vp, &obj) == 0) {
vm_object_page_clean(obj, 0, 0, 0);
}
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
error = softdep_fsync(vp);
#endif
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
fdrop(fp, td);
return (error);
}
/*
* Rename files. Source and destination must either both be directories,
* or both not be directories. If target is a directory, it must be empty.
*/
#ifndef _SYS_SYSPROTO_H_
struct rename_args {
char *from;
char *to;
};
#endif
/* ARGSUSED */
int
rename(td, uap)
struct thread *td;
register struct rename_args /* {
syscallarg(char *) from;
syscallarg(char *) to;
} */ *uap;
{
struct mount *mp;
struct vnode *tvp, *fvp, *tdvp;
struct nameidata fromnd, tond;
int error;
bwillwrite();
NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
SCARG(uap, from), td);
if ((error = namei(&fromnd)) != 0)
return (error);
fvp = fromnd.ni_vp;
if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
NDFREE(&fromnd, NDF_ONLY_PNBUF);
vrele(fromnd.ni_dvp);
vrele(fvp);
goto out1;
}
NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
UIO_USERSPACE, SCARG(uap, to), td);
if (fromnd.ni_vp->v_type == VDIR)
tond.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&tond)) != 0) {
/* Translate error code for rename("dir1", "dir2/."). */
if (error == EISDIR && fvp->v_type == VDIR)
error = EINVAL;
NDFREE(&fromnd, NDF_ONLY_PNBUF);
vrele(fromnd.ni_dvp);
vrele(fvp);
goto out1;
}
tdvp = tond.ni_dvp;
tvp = tond.ni_vp;
if (tvp != NULL) {
if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
error = ENOTDIR;
goto out;
} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
error = EISDIR;
goto out;
}
}
if (fvp == tdvp)
error = EINVAL;
/*
* If source is the same as the destination (that is the
* same inode number with the same name in the same directory),
* then there is nothing to do.
*/
if (fvp == tvp && fromnd.ni_dvp == tdvp &&
fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
!bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
fromnd.ni_cnd.cn_namelen))
error = -1;
out:
if (!error) {
VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
if (fromnd.ni_dvp != tdvp) {
VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
}
if (tvp) {
VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
}
error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
NDFREE(&fromnd, NDF_ONLY_PNBUF);
NDFREE(&tond, NDF_ONLY_PNBUF);
} else {
NDFREE(&fromnd, NDF_ONLY_PNBUF);
NDFREE(&tond, NDF_ONLY_PNBUF);
if (tdvp == tvp)
vrele(tdvp);
else
vput(tdvp);
if (tvp)
vput(tvp);
vrele(fromnd.ni_dvp);
vrele(fvp);
}
vrele(tond.ni_startdir);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
out1:
if (fromnd.ni_startdir)
vrele(fromnd.ni_startdir);
if (error == -1)
return (0);
return (error);
}
/*
* Make a directory file.
*/
#ifndef _SYS_SYSPROTO_H_
struct mkdir_args {
char *path;
int mode;
};
#endif
/* ARGSUSED */
int
mkdir(td, uap)
struct thread *td;
register struct mkdir_args /* {
syscallarg(char *) path;
syscallarg(int) mode;
} */ *uap;
{
return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
}
int
vn_mkdir(path, mode, segflg, td)
char *path;
int mode;
enum uio_seg segflg;
struct thread *td;
{
struct mount *mp;
struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
nd.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
if (vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vrele(vp);
vput(nd.ni_dvp);
return (EEXIST);
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VATTR_NULL(&vattr);
vattr.va_type = VDIR;
FILEDESC_LOCK(td->td_proc->p_fd);
vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
FILEDESC_UNLOCK(td->td_proc->p_fd);
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
if (!error)
vput(nd.ni_vp);
vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
return (error);
}
/*
* Remove a directory file.
*/
#ifndef _SYS_SYSPROTO_H_
struct rmdir_args {
char *path;
};
#endif
/* ARGSUSED */
int
rmdir(td, uap)
struct thread *td;
struct rmdir_args /* {
syscallarg(char *) path;
} */ *uap;
{
struct mount *mp;
struct vnode *vp;
int error;
struct nameidata nd;
restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
if (vp->v_type != VDIR) {
error = ENOTDIR;
goto out;
}
/*
* No rmdir "." please.
*/
if (nd.ni_dvp == vp) {
error = EINVAL;
goto out;
}
/*
* The root of a mounted filesystem cannot be deleted.
*/
if (vp->v_flag & VROOT) {
error = EBUSY;
goto out;
}
if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
vput(vp);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
return (error);
goto restart;
}
VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
vn_finished_write(mp);
out:
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
vput(vp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
return (error);
}
#ifdef COMPAT_43
/*
* Read a block of directory entries in a file system independent format.
*/
#ifndef _SYS_SYSPROTO_H_
struct ogetdirentries_args {
int fd;
char *buf;
u_int count;
long *basep;
};
#endif
int
ogetdirentries(td, uap)
struct thread *td;
register struct ogetdirentries_args /* {
syscallarg(int) fd;
syscallarg(char *) buf;
syscallarg(u_int) count;
syscallarg(long *) basep;
} */ *uap;
{
struct vnode *vp;
struct file *fp;
struct uio auio, kuio;
struct iovec aiov, kiov;
struct dirent *dp, *edp;
caddr_t dirbuf;
int error, eofflag, readcnt;
long loff;
/* XXX arbitrary sanity limit on `count'. */
if (SCARG(uap, count) > 64 * 1024)
return (EINVAL);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
fdrop(fp, td);
return (EBADF);
}
vp = (struct vnode *)fp->f_data;
unionread:
if (vp->v_type != VDIR) {
fdrop(fp, td);
return (EINVAL);
}
aiov.iov_base = SCARG(uap, buf);
aiov.iov_len = SCARG(uap, count);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auio.uio_resid = SCARG(uap, count);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
loff = auio.uio_offset = fp->f_offset;
# if (BYTE_ORDER != LITTLE_ENDIAN)
if (vp->v_mount->mnt_maxsymlinklen <= 0) {
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
NULL, NULL);
fp->f_offset = auio.uio_offset;
} else
# endif
{
kuio = auio;
kuio.uio_iov = &kiov;
kuio.uio_segflg = UIO_SYSSPACE;
kiov.iov_len = SCARG(uap, count);
MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
kiov.iov_base = dirbuf;
error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
NULL, NULL);
fp->f_offset = kuio.uio_offset;
if (error == 0) {
readcnt = SCARG(uap, count) - kuio.uio_resid;
edp = (struct dirent *)&dirbuf[readcnt];
for (dp = (struct dirent *)dirbuf; dp < edp; ) {
# if (BYTE_ORDER == LITTLE_ENDIAN)
/*
* The expected low byte of
* dp->d_namlen is our dp->d_type.
* The high MBZ byte of dp->d_namlen
* is our dp->d_namlen.
*/
dp->d_type = dp->d_namlen;
dp->d_namlen = 0;
# else
/*
* The dp->d_type is the high byte
* of the expected dp->d_namlen,
* so must be zero'ed.
*/
dp->d_type = 0;
# endif
if (dp->d_reclen > 0) {
dp = (struct dirent *)
((char *)dp + dp->d_reclen);
} else {
error = EIO;
break;
}
}
if (dp >= edp)
error = uiomove(dirbuf, readcnt, &auio);
}
FREE(dirbuf, M_TEMP);
}
VOP_UNLOCK(vp, 0, td);
if (error) {
fdrop(fp, td);
return (error);
}
if (SCARG(uap, count) == auio.uio_resid) {
if (union_dircheckp) {
error = union_dircheckp(td, &vp, fp);
if (error == -1)
goto unionread;
if (error) {
fdrop(fp, td);
return (error);
}
}
if ((vp->v_flag & VROOT) &&
(vp->v_mount->mnt_flag & MNT_UNION)) {
struct vnode *tvp = vp;
vp = vp->v_mount->mnt_vnodecovered;
VREF(vp);
fp->f_data = (caddr_t) vp;
fp->f_offset = 0;
vrele(tvp);
goto unionread;
}
}
error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
sizeof(long));
fdrop(fp, td);
td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
return (error);
}
#endif /* COMPAT_43 */
/*
* Read a block of directory entries in a file system independent format.
*/
#ifndef _SYS_SYSPROTO_H_
struct getdirentries_args {
int fd;
char *buf;
u_int count;
long *basep;
};
#endif
int
getdirentries(td, uap)
struct thread *td;
register struct getdirentries_args /* {
syscallarg(int) fd;
syscallarg(char *) buf;
syscallarg(u_int) count;
syscallarg(long *) basep;
} */ *uap;
{
struct vnode *vp;
struct file *fp;
struct uio auio;
struct iovec aiov;
long loff;
int error, eofflag;
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
fdrop(fp, td);
return (EBADF);
}
vp = (struct vnode *)fp->f_data;
unionread:
if (vp->v_type != VDIR) {
fdrop(fp, td);
return (EINVAL);
}
aiov.iov_base = SCARG(uap, buf);
aiov.iov_len = SCARG(uap, count);
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auio.uio_resid = SCARG(uap, count);
/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
loff = auio.uio_offset = fp->f_offset;
error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
fp->f_offset = auio.uio_offset;
VOP_UNLOCK(vp, 0, td);
if (error) {
fdrop(fp, td);
return (error);
}
if (SCARG(uap, count) == auio.uio_resid) {
if (union_dircheckp) {
error = union_dircheckp(td, &vp, fp);
if (error == -1)
goto unionread;
if (error) {
fdrop(fp, td);
return (error);
}
}
if ((vp->v_flag & VROOT) &&
(vp->v_mount->mnt_flag & MNT_UNION)) {
struct vnode *tvp = vp;
vp = vp->v_mount->mnt_vnodecovered;
VREF(vp);
fp->f_data = (caddr_t) vp;
fp->f_offset = 0;
vrele(tvp);
goto unionread;
}
}
if (SCARG(uap, basep) != NULL) {
error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
sizeof(long));
}
td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
fdrop(fp, td);
return (error);
}
#ifndef _SYS_SYSPROTO_H_
struct getdents_args {
int fd;
char *buf;
size_t count;
};
#endif
int
getdents(td, uap)
struct thread *td;
register struct getdents_args /* {
syscallarg(int) fd;
syscallarg(char *) buf;
syscallarg(u_int) count;
} */ *uap;
{
struct getdirentries_args ap;
ap.fd = uap->fd;
ap.buf = uap->buf;
ap.count = uap->count;
ap.basep = NULL;
return getdirentries(td, &ap);
}
/*
* Set the mode mask for creation of filesystem nodes.
*
* MP SAFE
*/
#ifndef _SYS_SYSPROTO_H_
struct umask_args {
int newmask;
};
#endif
int
umask(td, uap)
struct thread *td;
struct umask_args /* {
syscallarg(int) newmask;
} */ *uap;
{
register struct filedesc *fdp;
FILEDESC_LOCK(td->td_proc->p_fd);
fdp = td->td_proc->p_fd;
td->td_retval[0] = fdp->fd_cmask;
fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
FILEDESC_UNLOCK(td->td_proc->p_fd);
return (0);
}
/*
* Void all references to file by ripping underlying filesystem
* away from vnode.
*/
#ifndef _SYS_SYSPROTO_H_
struct revoke_args {
char *path;
};
#endif
/* ARGSUSED */
int
revoke(td, uap)
struct thread *td;
register struct revoke_args /* {
syscallarg(char *) path;
} */ *uap;
{
struct mount *mp;
struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
NDFREE(&nd, NDF_ONLY_PNBUF);
if (vp->v_type != VCHR) {
vput(vp);
return (EINVAL);
}
error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
if (error) {
vput(vp);
return (error);
}
VOP_UNLOCK(vp, 0, td);
if (td->td_ucred->cr_uid != vattr.va_uid) {
error = suser_cred(td->td_ucred, PRISON_ROOT);
if (error)
goto out;
}
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
goto out;
if (vcount(vp) > 1)
VOP_REVOKE(vp, REVOKEALL);
vn_finished_write(mp);
out:
vrele(vp);
return (error);
}
/*
* Convert a user file descriptor to a kernel file entry.
* The file entry is locked upon returning.
*/
int
getvnode(fdp, fd, fpp)
struct filedesc *fdp;
int fd;
struct file **fpp;
{
int error;
struct file *fp;
fp = NULL;
if (fdp == NULL)
error = EBADF;
else {
FILEDESC_LOCK(fdp);
if ((u_int)fd >= fdp->fd_nfiles ||
(fp = fdp->fd_ofiles[fd]) == NULL)
error = EBADF;
else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
fp = NULL;
error = EINVAL;
} else {
fhold(fp);
error = 0;
}
FILEDESC_UNLOCK(fdp);
}
*fpp = fp;
return (error);
}
/*
* Get (NFS) file handle
*/
#ifndef _SYS_SYSPROTO_H_
struct getfh_args {
char *fname;
fhandle_t *fhp;
};
#endif
int
getfh(td, uap)
struct thread *td;
register struct getfh_args *uap;
{
struct nameidata nd;
fhandle_t fh;
register struct vnode *vp;
int error;
/*
* Must be super user
*/
error = suser(td);
if (error)
return (error);
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
error = namei(&nd);
if (error)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
bzero(&fh, sizeof(fh));
fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
error = VFS_VPTOFH(vp, &fh.fh_fid);
vput(vp);
if (error)
return (error);
error = copyout(&fh, uap->fhp, sizeof (fh));
return (error);
}
/*
* syscall for the rpc.lockd to use to translate a NFS file handle into
* an open descriptor.
*
* warning: do not remove the suser() call or this becomes one giant
* security hole.
*/
#ifndef _SYS_SYSPROTO_H_
struct fhopen_args {
const struct fhandle *u_fhp;
int flags;
};
#endif
int
fhopen(td, uap)
struct thread *td;
struct fhopen_args /* {
syscallarg(const struct fhandle *) u_fhp;
syscallarg(int) flags;
} */ *uap;
{
struct proc *p = td->td_proc;
struct mount *mp;
struct vnode *vp;
struct fhandle fhp;
struct vattr vat;
struct vattr *vap = &vat;
struct flock lf;
struct file *fp;
register struct filedesc *fdp = p->p_fd;
int fmode, mode, error, type;
struct file *nfp;
int indx;
/*
* Must be super user
*/
error = suser(td);
if (error)
return (error);
fmode = FFLAGS(SCARG(uap, flags));
/* why not allow a non-read/write open for our lockd? */
if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
return (EINVAL);
error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
if (error)
return(error);
/* find the mount point */
mp = vfs_getvfs(&fhp.fh_fsid);
if (mp == NULL)
return (ESTALE);
/* now give me my vnode, it gets returned to me locked */
error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
if (error)
return (error);
/*
* from now on we have to make sure not
* to forget about the vnode
* any error that causes an abort must vput(vp)
* just set error = err and 'goto bad;'.
*/
/*
* from vn_open
*/
if (vp->v_type == VLNK) {
error = EMLINK;
goto bad;
}
if (vp->v_type == VSOCK) {
error = EOPNOTSUPP;
goto bad;
}
mode = 0;
if (fmode & (FWRITE | O_TRUNC)) {
if (vp->v_type == VDIR) {
error = EISDIR;
goto bad;
}
error = vn_writechk(vp);
if (error)
goto bad;
mode |= VWRITE;
}
if (fmode & FREAD)
mode |= VREAD;
if (mode) {
error = VOP_ACCESS(vp, mode, td->td_ucred, td);
if (error)
goto bad;
}
if (fmode & O_TRUNC) {
VOP_UNLOCK(vp, 0, td); /* XXX */
if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
vrele(vp);
return (error);
}
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */
VATTR_NULL(vap);
vap->va_size = 0;
error = VOP_SETATTR(vp, vap, td->td_ucred, td);
vn_finished_write(mp);
if (error)
goto bad;
}
error = VOP_OPEN(vp, fmode, td->td_ucred, td);
if (error)
goto bad;
/*
* Make sure that a VM object is created for VMIO support.
*/
if (vn_canvmio(vp) == TRUE) {
if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
goto bad;
}
if (fmode & FWRITE)
vp->v_writecount++;
/*
* end of vn_open code
*/
if ((error = falloc(td, &nfp, &indx)) != 0) {
if (fmode & FWRITE)
vp->v_writecount--;
goto bad;
}
fp = nfp;
/*
* Hold an extra reference to avoid having fp ripped out
* from under us while we block in the lock op
*/
fhold(fp);
nfp->f_data = (caddr_t)vp;
nfp->f_flag = fmode & FMASK;
nfp->f_ops = &vnops;
nfp->f_type = DTYPE_VNODE;
if (fmode & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
if (fmode & O_EXLOCK)
lf.l_type = F_WRLCK;
else
lf.l_type = F_RDLCK;
type = F_FLOCK;
if ((fmode & FNONBLOCK) == 0)
type |= F_WAIT;
VOP_UNLOCK(vp, 0, td);
if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
/*
* The lock request failed. Normally close the
* descriptor but handle the case where someone might
* have dup()d or close()d it when we weren't looking.
*/
FILEDESC_LOCK(fdp);
if (fdp->fd_ofiles[indx] == fp) {
fdp->fd_ofiles[indx] = NULL;
FILEDESC_UNLOCK(fdp);
fdrop(fp, td);
} else
FILEDESC_UNLOCK(fdp);
/*
* release our private reference
*/
fdrop(fp, td);
return(error);
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
fp->f_flag |= FHASLOCK;
}
if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
vfs_object_create(vp, td, td->td_ucred);
VOP_UNLOCK(vp, 0, td);
fdrop(fp, td);
td->td_retval[0] = indx;
return (0);
bad:
vput(vp);
return (error);
}
/*
* Stat an (NFS) file handle.
*/
#ifndef _SYS_SYSPROTO_H_
struct fhstat_args {
struct fhandle *u_fhp;
struct stat *sb;
};
#endif
int
fhstat(td, uap)
struct thread *td;
register struct fhstat_args /* {
syscallarg(struct fhandle *) u_fhp;
syscallarg(struct stat *) sb;
} */ *uap;
{
struct stat sb;
fhandle_t fh;
struct mount *mp;
struct vnode *vp;
int error;
/*
* Must be super user
*/
error = suser(td);
if (error)
return (error);
error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
if (error)
return (error);
if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
return (ESTALE);
if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
return (error);
error = vn_stat(vp, &sb, td);
vput(vp);
if (error)
return (error);
error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
return (error);
}
/*
* Implement fstatfs() for (NFS) file handles.
*/
#ifndef _SYS_SYSPROTO_H_
struct fhstatfs_args {
struct fhandle *u_fhp;
struct statfs *buf;
};
#endif
int
fhstatfs(td, uap)
struct thread *td;
struct fhstatfs_args /* {
syscallarg(struct fhandle) *u_fhp;
syscallarg(struct statfs) *buf;
} */ *uap;
{
struct statfs *sp;
struct mount *mp;
struct vnode *vp;
struct statfs sb;
fhandle_t fh;
int error;
/*
* Must be super user
*/
error = suser(td);
if (error)
return (error);
if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
return (error);
if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
return (ESTALE);
if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
return (error);
mp = vp->v_mount;
sp = &mp->mnt_stat;
vput(vp);
if ((error = VFS_STATFS(mp, sp, td)) != 0)
return (error);
sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
if (suser(td)) {
bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
sp = &sb;
}
return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
}
/*
* Syscall to push extended attribute configuration information into the
* VFS. Accepts a path, which it converts to a mountpoint, as well as
* a command (int cmd), and attribute name and misc data. For now, the
* attribute name is left in userspace for consumption by the VFS_op.
* It will probably be changed to be copied into sysspace by the
* syscall in the future, once issues with various consumers of the
* attribute code have raised their hands.
*
* Currently this is used only by UFS Extended Attributes.
*/
int
extattrctl(td, uap)
struct thread *td;
struct extattrctl_args *uap;
{
struct vnode *filename_vp;
struct nameidata nd;
struct mount *mp, *mp_writable;
char attrname[EXTATTR_MAXNAMELEN];
int error;
/*
* SCARG(uap, attrname) not always defined. We check again later
* when we invoke the VFS call so as to pass in NULL there if needed.
*/
if (SCARG(uap, attrname) != NULL) {
error = copyinstr(SCARG(uap, attrname), attrname,
EXTATTR_MAXNAMELEN, NULL);
if (error)
return (error);
}
/*
* SCARG(uap, filename) not always defined. If it is, grab
* a vnode lock, which VFS_EXTATTRCTL() will later release.
*/
filename_vp = NULL;
if (SCARG(uap, filename) != NULL) {
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, filename), td);
if ((error = namei(&nd)) != 0)
return (error);
filename_vp = nd.ni_vp;
NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
}
/* SCARG(uap, path) always defined. */
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0) {
if (filename_vp != NULL)
vput(filename_vp);
return (error);
}
mp = nd.ni_vp->v_mount;
error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
NDFREE(&nd, 0);
if (error) {
if (filename_vp != NULL)
vput(filename_vp);
return (error);
}
if (SCARG(uap, attrname) != NULL) {
error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
SCARG(uap, attrnamespace), attrname, td);
} else {
error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
SCARG(uap, attrnamespace), NULL, td);
}
vn_finished_write(mp_writable);
/*
* VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
* filename_vp, so vrele it if it is defined.
*/
if (filename_vp != NULL)
vrele(filename_vp);
return (error);
}
/*-
* Set a named extended attribute on a file or directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", userspace buffer
* pointer "data", buffer length "nbytes", thread "td".
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
void *data, size_t nbytes, struct thread *td)
{
struct mount *mp;
struct uio auio;
struct iovec aiov;
ssize_t cnt;
int error;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
aiov.iov_base = data;
aiov.iov_len = nbytes;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
if (nbytes > INT_MAX) {
error = EINVAL;
goto done;
}
auio.uio_resid = nbytes;
auio.uio_rw = UIO_WRITE;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
cnt = nbytes;
error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
td->td_ucred, td);
cnt -= auio.uio_resid;
td->td_retval[0] = cnt;
done:
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return (error);
}
int
extattr_set_file(td, uap)
struct thread *td;
struct extattr_set_file_args *uap;
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
SCARG(uap, data), SCARG(uap, nbytes), td);
vrele(nd.ni_vp);
return (error);
}
int
extattr_set_fd(td, uap)
struct thread *td;
struct extattr_set_fd_args *uap;
{
struct file *fp;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
error = extattr_set_vp((struct vnode *)fp->f_data,
SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
SCARG(uap, nbytes), td);
fdrop(fp, td);
return (error);
}
/*-
* Get a named extended attribute on a file or directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", userspace buffer
* pointer "data", buffer length "nbytes", thread "td".
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
void *data, size_t nbytes, struct thread *td)
{
struct uio auio;
struct iovec aiov;
ssize_t cnt;
size_t size;
int error;
VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
/*
* Slightly unusual semantics: if the user provides a NULL data
* pointer, they don't want to receive the data, just the
* maximum read length.
*/
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
auio.uio_iov = &aiov;
auio.uio_offset = 0;
if (nbytes > INT_MAX) {
error = EINVAL;
goto done;
}
auio.uio_resid = nbytes;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
cnt = nbytes;
error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
NULL, td->td_ucred, td);
cnt -= auio.uio_resid;
td->td_retval[0] = cnt;
} else {
error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
&size, td->td_ucred, td);
td->td_retval[0] = size;
}
done:
VOP_UNLOCK(vp, 0, td);
return (error);
}
int
extattr_get_file(td, uap)
struct thread *td;
struct extattr_get_file_args *uap;
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
SCARG(uap, data), SCARG(uap, nbytes), td);
vrele(nd.ni_vp);
return (error);
}
int
extattr_get_fd(td, uap)
struct thread *td;
struct extattr_get_fd_args *uap;
{
struct file *fp;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
error = extattr_get_vp((struct vnode *)fp->f_data,
SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
SCARG(uap, nbytes), td);
fdrop(fp, td);
return (error);
}
/*
* extattr_delete_vp(): Delete a named extended attribute on a file or
* directory
*
* Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
* kernelspace string pointer "attrname", proc "p"
* Returns: 0 on success, an error number otherwise
* Locks: none
* References: vp must be a valid reference for the duration of the call
*/
static int
extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
struct thread *td)
{
struct mount *mp;
int error;
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
td);
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
return (error);
}
int
extattr_delete_file(td, uap)
struct thread *td;
struct extattr_delete_file_args *uap;
{
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return(error);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
if ((error = namei(&nd)) != 0)
return(error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
attrname, td);
vrele(nd.ni_vp);
return(error);
}
int
extattr_delete_fd(td, uap)
struct thread *td;
struct extattr_delete_fd_args *uap;
{
struct file *fp;
struct vnode *vp;
char attrname[EXTATTR_MAXNAMELEN];
int error;
error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
NULL);
if (error)
return (error);
if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
error = extattr_delete_vp((struct vnode *)fp->f_data,
SCARG(uap, attrnamespace), attrname, td);
fdrop(fp, td);
return (error);
}