/* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 * $FreeBSD$ */ /* For 4.3 integer FS ID compatibility */ #include "opt_compat.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int change_dir(struct nameidata *ndp, struct thread *td); static void checkdirs(struct vnode *olddp, struct vnode *newdp); static int chroot_refuse_vdir_fds(struct filedesc *fdp); static int getutimes(const struct timeval *, struct timespec *); static int setfown(struct thread *td, struct vnode *, uid_t, gid_t); static int setfmode(struct thread *td, struct vnode *, int); static int setfflags(struct thread *td, struct vnode *, int); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); static void vfs_freeopts(struct vfsoptlist *opt); static int vfs_nmount(struct thread *td, int, struct uio *); static int usermount = 0; /* if 1, non-root can mount fs. */ int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *); SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); /* * Virtual File System System Calls */ #ifndef _SYS_SYSPROTO_H_ struct nmount_args { struct iovec *iovp; unsigned int iovcnt; int flags; }; #endif /* ARGSUSED */ int nmount(td, uap) struct thread *td; struct nmount_args /* { syscallarg(struct iovec *) iovp; syscallarg(unsigned int) iovcnt; syscallarg(int) flags; } */ *uap; { struct uio auio; struct iovec *iov, *needfree; struct iovec aiov[UIO_SMALLIOV]; long error, i; u_int iovlen, iovcnt; iovcnt = SCARG(uap, iovcnt); iovlen = iovcnt * sizeof (struct iovec); /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV)) return (EINVAL); if (iovcnt > UIO_SMALLIOV) { MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); needfree = iov; } else { iov = aiov; needfree = NULL; } auio.uio_iov = iov; auio.uio_iovcnt = iovcnt; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = 0; if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) goto finish; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - auio.uio_resid) { error = EINVAL; goto finish; } auio.uio_resid += iov->iov_len; iov++; } error = vfs_nmount(td, SCARG(uap, flags), &auio); finish: if (needfree != NULL) free(needfree, M_TEMP); return (error); } /* * Release all resources related to the * mount options. */ static void vfs_freeopts(struct vfsoptlist *opt) { free(opt->opt, M_MOUNT); free(opt->optbuf, M_MOUNT); free(opt, M_MOUNT); } int kernel_mount(iovp, iovcnt, flags) struct iovec *iovp; unsigned int iovcnt; int flags; { struct uio auio; struct iovec *iov; int error, i; /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4)) return (EINVAL); auio.uio_iov = iovp; auio.uio_iovcnt = iovcnt; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_offset = 0; auio.uio_td = NULL; auio.uio_resid = 0; iov = iovp; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - auio.uio_resid) { return (EINVAL); } auio.uio_resid += iov->iov_len; iov++; } error = vfs_nmount(curthread, flags, &auio); return (error); } int kernel_vmount(int flags, ...) { struct iovec *iovp; struct uio auio; va_list ap; unsigned int iovcnt, iovlen, len; const char *cp; char *buf, *pos; size_t n; int error, i; len = 0; va_start(ap, flags); for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++) len += strlen(cp) + 1; va_end(ap); if (iovcnt < 4 || iovcnt & 1) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK); MALLOC(buf, char *, len, M_MOUNT, M_WAITOK); pos = buf; va_start(ap, flags); for (i = 0; i < iovcnt; i++) { cp = va_arg(ap, const char *); copystr(cp, pos, len - (pos - buf), &n); iovp[i].iov_base = pos; iovp[i].iov_len = n; pos += n; } va_end(ap); auio.uio_iov = iovp; auio.uio_iovcnt = iovcnt; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_offset = 0; auio.uio_td = NULL; auio.uio_resid = len; error = vfs_nmount(curthread, flags, &auio); FREE(iovp, M_MOUNT); FREE(buf, M_MOUNT); return (error); } /* * vfs_nmount(): actually attempt a filesystem mount. */ static int vfs_nmount(td, fsflags, fsoptions) struct thread *td; int fsflags; /* Flags common to all filesystems. */ struct uio *fsoptions; /* Options local to the filesystem. */ { linker_file_t lf; struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; struct iovec *cur; struct vfsoptlist *optlist; struct vfsopt *opt; char *buf, *fstype, *fspath; int error, flag = 0, kern_flag = 0, i, len, optcnt; int offset, iovcnt, fstypelen, fspathlen; struct vattr va; struct nameidata nd; /* * Allocate memory to hold the vfsopt structures. */ iovcnt = fsoptions->uio_iovcnt; optcnt = iovcnt >> 1; opt = malloc(sizeof (struct vfsopt) * optcnt, M_MOUNT, M_WAITOK | M_ZERO); /* * Count the size of the buffer for options, * allocate it, and fill in the vfsopt structures. */ cur = fsoptions->uio_iov; len = fsoptions->uio_resid; buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK); optlist->opt = opt; optlist->optbuf = buf; optlist->optcnt = optcnt; offset = i = 0; cur = fsoptions->uio_iov; while (i < optcnt) { opt[i].name = buf + offset; /* Ensure the name of an option is a string. */ if (opt[i].name[cur->iov_len - 1] != '\0') { error = EINVAL; goto bad; } offset += cur->iov_len; cur++; opt[i].len = cur->iov_len; /* * Prevent consumers from trying to * read the value of a 0 length option * by setting it to NULL. */ if (opt[i].len == 0) opt[i].value = NULL; else opt[i].value = buf + offset; offset += cur->iov_len; cur++; i++; } if ((error = uiomove(buf, len, fsoptions)) != 0) goto bad; /* * We need these two options before the others, * and they are mandatory for any filesystem. * Ensure they are NULL terminated as well. */ fstypelen = 0; error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); if (error || fstype[fstypelen - 1] != '\0') { error = EINVAL; goto bad; } fspathlen = 0; error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); if (error || fspath[fspathlen - 1] != '\0') { error = EINVAL; goto bad; } /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { error = ENAMETOOLONG; goto bad; } if (usermount == 0) { error = suser(td); if (error) goto bad; } /* * Do not allow NFS export by non-root users. */ if (fsflags & MNT_EXPORTED) { error = suser(td); if (error) goto bad; } /* * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (suser(td)) fsflags |= MNT_NOSUID | MNT_NODEV; /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); if ((error = namei(&nd)) != 0) goto bad; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (fsflags & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); error = EINVAL; goto bad; } mp = vp->v_mount; flag = mp->mnt_flag; kern_flag = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((fsflags & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); error = EOPNOTSUPP; /* Needs translation */ goto bad; } /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { error = suser(td); if (error) { vput(vp); goto bad; } } if (vfs_busy(mp, LK_NOWAIT, 0, td)) { vput(vp); error = EBUSY; goto bad; } mtx_lock(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { mtx_unlock(&vp->v_interlock); vfs_unbusy(mp, td); vput(vp); error = EBUSY; goto bad; } vp->v_flag |= VMOUNT; mtx_unlock(&vp->v_interlock); mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); VOP_UNLOCK(vp, 0, td); mp->mnt_optnew = optlist; goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ error = VOP_GETATTR(vp, &va, td->td_ucred, td); if (error) { vput(vp); goto bad; } if (va.va_uid != td->td_ucred->cr_uid) { error = suser(td); if (error) { vput(vp); goto bad; } } if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) { vput(vp); goto bad; } if (vp->v_type != VDIR) { vput(vp); error = ENOTDIR; goto bad; } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstype)) break; if (vfsp == NULL) { /* Only load modules for root (very important!). */ error = suser(td); if (error) { vput(vp); goto bad; } error = securelevel_gt(td->td_ucred, 0); if (error) { vput(vp); goto bad; } error = linker_load_file(fstype, &lf); if (error || lf == NULL) { vput(vp); if (lf == NULL) error = ENODEV; goto bad; } lf->userrefs++; /* Look up again to see if the VFS was loaded. */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstype)) break; if (vfsp == NULL) { lf->userrefs--; linker_file_unload(lf); vput(vp); error = ENODEV; goto bad; } } mtx_lock(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { mtx_unlock(&vp->v_interlock); vput(vp); error = EBUSY; goto bad; } vp->v_flag |= VMOUNT; mtx_unlock(&vp->v_interlock); /* * Allocate and initialize the filesystem. */ mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); TAILQ_INIT(&mp->mnt_nvnodelist); TAILQ_INIT(&mp->mnt_reservedvnlist); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, td); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = td->td_ucred->cr_uid; strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); mp->mnt_iosize_max = DFLTPHYS; VOP_UNLOCK(vp, 0, td); mp->mnt_opt = optlist; update: /* * Check if the fs implements the new VFS_NMOUNT() * function, since the new system call was used. */ if (mp->mnt_op->vfs_mount != NULL) { printf("%s doesn't support the new mount syscall\n", mp->mnt_vfc->vfc_name); mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); if (mp->mnt_flag & MNT_UPDATE) vfs_unbusy(mp, td); else { mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, td); free((caddr_t)mp, M_MOUNT); } vput(vp); error = EOPNOTSUPP; goto bad; } /* * Set the mount level flags. */ if (fsflags & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ MNT_UPDATEMASK; mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_NMOUNT(mp, &nd, td); if (mp->mnt_flag & MNT_UPDATE) { if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = kern_flag; vfs_freeopts(mp->mnt_optnew); } else { vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { if (mp->mnt_syncer == NULL) error = vfs_allocate_syncvnode(mp); } else { if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); mp->mnt_syncer = NULL; } vfs_unbusy(mp, td); mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); vrele(vp); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { struct vnode *newdp; mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; vp->v_mountedhere = mp; mtx_unlock(&vp->v_interlock); mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); if (VFS_ROOT(mp, &newdp)) panic("mount: lost mount"); checkdirs(vp, newdp); vput(newdp); VOP_UNLOCK(vp, 0, td); if ((mp->mnt_flag & MNT_RDONLY) == 0) error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, td); if ((error = VFS_START(mp, 0, td)) != 0) { vrele(vp); goto bad; } } else { mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, td); free((caddr_t)mp, M_MOUNT); vput(vp); goto bad; } return (0); bad: vfs_freeopts(optlist); return (error); } /* * Old Mount API. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(td, uap) struct thread *td; struct mount_args /* { syscallarg(char *) type; syscallarg(char *) path; syscallarg(int) flags; syscallarg(caddr_t) data; } */ *uap; { char *fstype; char *fspath; int error; fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK); /* * vfs_mount() actually takes a kernel string for `type' and * `path' now, so extract them. */ error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL); if (error) goto finish; error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL); if (error) goto finish; error = vfs_mount(td, fstype, fspath, SCARG(uap, flags), SCARG(uap, data)); finish: free(fstype, M_TEMP); free(fspath, M_TEMP); return (error); } /* * vfs_mount(): actually attempt a filesystem mount. * * This routine is designed to be a "generic" entry point for routines * that wish to mount a filesystem. All parameters except `fsdata' are * pointers into kernel space. `fsdata' is currently still a pointer * into userspace. */ int vfs_mount(td, fstype, fspath, fsflags, fsdata) struct thread *td; const char *fstype; char *fspath; int fsflags; void *fsdata; { linker_file_t lf; struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, kern_flag = 0; struct vattr va; struct nameidata nd; /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) return (ENAMETOOLONG); if (usermount == 0) { error = suser(td); if (error) return (error); } /* * Do not allow NFS export by non-root users. */ if (fsflags & MNT_EXPORTED) { error = suser(td); if (error) return (error); } /* * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (suser(td)) fsflags |= MNT_NOSUID | MNT_NODEV; /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (fsflags & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; kern_flag = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((fsflags & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { error = suser(td); if (error) { vput(vp); return (error); } } if (vfs_busy(mp, LK_NOWAIT, 0, td)) { vput(vp); return (EBUSY); } mtx_lock(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { mtx_unlock(&vp->v_interlock); vfs_unbusy(mp, td); vput(vp); return (EBUSY); } vp->v_flag |= VMOUNT; mtx_unlock(&vp->v_interlock); mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); VOP_UNLOCK(vp, 0, td); goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ error = VOP_GETATTR(vp, &va, td->td_ucred, td); if (error) { vput(vp); return (error); } if (va.va_uid != td->td_ucred->cr_uid) { error = suser(td); if (error) { vput(vp); return (error); } } if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) { vput(vp); return (error); } if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstype)) break; if (vfsp == NULL) { /* Only load modules for root (very important!). */ error = suser(td); if (error) { vput(vp); return (error); } error = securelevel_gt(td->td_ucred, 0); if (error) { vput(vp); return (error); } error = linker_load_file(fstype, &lf); if (error || lf == NULL) { vput(vp); if (lf == NULL) error = ENODEV; return (error); } lf->userrefs++; /* Look up again to see if the VFS was loaded. */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstype)) break; if (vfsp == NULL) { lf->userrefs--; linker_file_unload(lf); vput(vp); return (ENODEV); } } mtx_lock(&vp->v_interlock); if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { mtx_unlock(&vp->v_interlock); vput(vp); return (EBUSY); } vp->v_flag |= VMOUNT; mtx_unlock(&vp->v_interlock); /* * Allocate and initialize the filesystem. */ mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); TAILQ_INIT(&mp->mnt_nvnodelist); TAILQ_INIT(&mp->mnt_reservedvnlist); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, td); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = td->td_ucred->cr_uid; strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); mp->mnt_iosize_max = DFLTPHYS; VOP_UNLOCK(vp, 0, td); update: /* * Check if the fs implements the old VFS_MOUNT() * function, since the old system call was used. */ if (mp->mnt_op->vfs_mount == NULL) { printf("%s doesn't support the old mount syscall\n", mp->mnt_vfc->vfc_name); mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); if (mp->mnt_flag & MNT_UPDATE) vfs_unbusy(mp, td); else { mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, td); free((caddr_t)mp, M_MOUNT); } vput(vp); return (EOPNOTSUPP); } /* * Set the mount level flags. */ if (fsflags & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ MNT_UPDATEMASK; mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_MOUNT(mp, fspath, fsdata, &nd, td); if (mp->mnt_flag & MNT_UPDATE) { if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = kern_flag; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { if (mp->mnt_syncer == NULL) error = vfs_allocate_syncvnode(mp); } else { if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); mp->mnt_syncer = NULL; } vfs_unbusy(mp, td); mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); vrele(vp); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { struct vnode *newdp; mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; vp->v_mountedhere = mp; mtx_unlock(&vp->v_interlock); mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); if (VFS_ROOT(mp, &newdp)) panic("mount: lost mount"); checkdirs(vp, newdp); vput(newdp); VOP_UNLOCK(vp, 0, td); if ((mp->mnt_flag & MNT_RDONLY) == 0) error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, td); if ((error = VFS_START(mp, 0, td)) != 0) vrele(vp); } else { mtx_lock(&vp->v_interlock); vp->v_flag &= ~VMOUNT; mtx_unlock(&vp->v_interlock); mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, td); free((caddr_t)mp, M_MOUNT); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory of `olddp'. If so, replace them with the new * mount point. */ static void checkdirs(olddp, newdp) struct vnode *olddp, *newdp; { struct filedesc *fdp; struct proc *p; int nrele; if (olddp->v_usecount == 1) return; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { PROC_LOCK(p); fdp = p->p_fd; if (fdp == NULL) { PROC_UNLOCK(p); continue; } nrele = 0; FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { VREF(newdp); fdp->fd_cdir = newdp; nrele++; } if (fdp->fd_rdir == olddp) { VREF(newdp); fdp->fd_rdir = newdp; nrele++; } FILEDESC_UNLOCK(fdp); PROC_UNLOCK(p); while (nrele--) vrele(olddp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } } /* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(td, uap) struct thread *td; register struct unmount_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); mp = vp->v_mount; /* * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { error = suser(td); if (error) { vput(vp); return (error); } } /* * Don't allow unmounting the root file system. */ if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } vput(vp); return (dounmount(mp, SCARG(uap, flags), td)); } /* * Do the actual file system unmount. */ int dounmount(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct vnode *coveredvp, *fsrootvp; int error; int async_flag; mtx_lock(&mountlist_mtx); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { mtx_unlock(&mountlist_mtx); return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; /* Allow filesystems to detect that a forced unmount is in progress. */ if (flags & MNT_FORCE) mp->mnt_kern_flag |= MNTK_UNMOUNTF; error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td); if (error) { mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); return (error); } vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); async_flag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); /* Move process cdir/rdir refs on fs root to underlying vnode. */ if (VFS_ROOT(mp, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL) checkdirs(fsrootvp, mp->mnt_vnodecovered); if (fsrootvp == rootvnode) { vrele(rootvnode); rootvnode = NULL; } vput(fsrootvp); } if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) || (flags & MNT_FORCE)) { error = VFS_UNMOUNT(mp, flags, td); } vn_finished_write(mp); if (error) { /* Undo cdir/rdir and rootvnode changes made above. */ if (VFS_ROOT(mp, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL) checkdirs(mp->mnt_vnodecovered, fsrootvp); if (rootvnode == NULL) { rootvnode = fsrootvp; vref(rootvnode); } vput(fsrootvp); } if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) (void) vfs_allocate_syncvnode(mp); mtx_lock(&mountlist_mtx); mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); mp->mnt_flag |= async_flag; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); return (error); } mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULL) coveredvp->v_mountedhere = NULL; mp->mnt_vfc->vfc_refcount--; if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) panic("unmount: dangling vnode"); lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td); lockdestroy(&mp->mnt_lock); if (coveredvp != NULL) vrele(coveredvp); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); if (mp->mnt_op->vfs_mount == NULL) vfs_freeopts(mp->mnt_opt); free((caddr_t)mp, M_MOUNT); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(td, uap) struct thread *td; struct sync_args *uap; { struct mount *mp, *nmp; int asyncflag; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, ((td != NULL) ? td->td_ucred : NOCRED), td); mp->mnt_flag |= asyncflag; vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif return (0); } /* XXX PRISON: could be per prison flag */ static int prison_quotas; #if 0 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); #endif /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif /* ARGSUSED */ int quotactl(td, uap) struct thread *td; register struct quotactl_args /* { syscallarg(char *) path; syscallarg(int) cmd; syscallarg(int) uid; syscallarg(caddr_t) arg; } */ *uap; { struct mount *mp; int error; struct nameidata nd; if (jailed(td->td_ucred) && !prison_quotas) return (EPERM); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); vrele(nd.ni_vp); if (error) return (error); error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), SCARG(uap, arg), td); vn_finished_write(mp); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif /* ARGSUSED */ int statfs(td, uap) struct thread *td; register struct statfs_args /* { syscallarg(char *) path; syscallarg(struct statfs *) buf; } */ *uap; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; struct statfs sb; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); error = VFS_STATFS(mp, sp, td); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (suser(td)) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif /* ARGSUSED */ int fstatfs(td, uap) struct thread *td; register struct fstatfs_args /* { syscallarg(int) fd; syscallarg(struct statfs *) buf; } */ *uap; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; struct statfs sb; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; fdrop(fp, td); if (mp == NULL) return (EBADF); sp = &mp->mnt_stat; error = VFS_STATFS(mp, sp, td); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (suser(td)) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(td, uap) struct thread *td; register struct getfsstat_args /* { syscallarg(struct statfs *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY * overrides MNT_WAIT. */ if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, td))) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, td); return (error); } sfsp += sizeof(*sp); } count++; mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); if (sfsp && count > maxcount) td->td_retval[0] = maxcount; else td->td_retval[0] = count; return (0); } /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif /* ARGSUSED */ int fchdir(td, uap) struct thread *td; struct fchdir_args /* { syscallarg(int) fd; } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int error; if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0, 0, td)) continue; error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp, td); if (error) break; vput(vp); vp = tdp; } if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, td); FILEDESC_LOCK(fdp); vpold = fdp->fd_cdir; fdp->fd_cdir = vp; FILEDESC_UNLOCK(fdp); vrele(vpold); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif /* ARGSUSED */ int chdir(td, uap) struct thread *td; struct chdir_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; struct vnode *vp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); FILEDESC_LOCK(fdp); vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; FILEDESC_UNLOCK(fdp); vrele(vp); return (0); } /* * Helper function for raised chroot(2) security function: Refuse if * any filedescriptors are open directories. */ static int chroot_refuse_vdir_fds(fdp) struct filedesc *fdp; { struct vnode *vp; struct file *fp; int fd; FILEDESC_LOCK(fdp); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { fp = fget_locked(fdp, fd); if (fp == NULL) continue; if (fp->f_type == DTYPE_VNODE) { vp = (struct vnode *)fp->f_data; if (vp->v_type == VDIR) { FILEDESC_UNLOCK(fdp); return (EPERM); } } } FILEDESC_UNLOCK(fdp); return (0); } /* * This sysctl determines if we will allow a process to chroot(2) if it * has a directory open: * 0: disallowed for all processes. * 1: allowed for processes that were not already chroot(2)'ed. * 2: allowed for all processes. */ static int chroot_allow_open_directories = 1; SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, &chroot_allow_open_directories, 0, ""); /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif /* ARGSUSED */ int chroot(td, uap) struct thread *td; struct chroot_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; struct vnode *vp; error = suser_cred(td->td_ucred, PRISON_ROOT); if (error) return (error); FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { FILEDESC_UNLOCK(fdp); error = chroot_refuse_vdir_fds(fdp); } else FILEDESC_UNLOCK(fdp); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); FILEDESC_LOCK(fdp); vp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; if (!fdp->fd_jdir) { fdp->fd_jdir = nd.ni_vp; VREF(fdp->fd_jdir); } FILEDESC_UNLOCK(fdp); vrele(vp); return (0); } /* * Common routine for chroot and chdir. */ static int change_dir(ndp, td) register struct nameidata *ndp; struct thread *td; { struct vnode *vp; int error; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) vput(vp); else VOP_UNLOCK(vp, 0, td); return (error); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(td, uap) struct thread *td; register struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ *uap; { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct vattr vat; struct mount *mp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; oflags = SCARG(uap, flags); if ((oflags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(oflags); error = falloc(td, &nfp, &indx); if (error) return (error); fp = nfp; FILEDESC_LOCK(fdp); cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; FILEDESC_UNLOCK(fdp); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); td->td_dupfd = -indx - 1; /* XXX check for fdopen */ /* * Bump the ref count to prevent another process from closing * the descriptor while we are blocked in vn_open() */ fhold(fp); error = vn_open(&nd, &flags, cmode); if (error) { /* * release our own reference */ fdrop(fp, td); /* * handle special fdopen() case. bleh. dupfdopen() is * responsible for dropping the old contents of ofiles[indx] * if it succeeds. */ if ((error == ENODEV || error == ENXIO) && td->td_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) { td->td_retval[0] = indx; return (0); } /* * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; FILEDESC_UNLOCK(fdp); fdrop(fp, td); } else FILEDESC_UNLOCK(fdp); if (error == ERESTART) error = EINTR; return (error); } td->td_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * There should be 2 references on the file, one from the descriptor * table, and one for us. * * Handle the case where someone closed the file (via its file * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. */ FILEDESC_LOCK(fdp); FILE_LOCK(fp); if (fp->f_count == 1) { KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); FILEDESC_UNLOCK(fdp); FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); fdrop(fp, td); td->td_retval[0] = indx; return 0; } fp->f_data = (caddr_t)vp; fp->f_flag = flags & FMASK; fp->f_ops = &vnops; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); FILEDESC_UNLOCK(fdp); FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) goto bad; fp->f_flag |= FHASLOCK; } if (flags & O_TRUNC) { if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto bad; VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); VATTR_NULL(&vat); vat.va_size = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_SETATTR(vp, &vat, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); if (error) goto bad; } /* assert that vn_open created a backing object if one is needed */ KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, ("open: vmio vnode has no backing object after vn_open")); /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; FILEDESC_UNLOCK(fdp); fdrop(fp, td); } else FILEDESC_UNLOCK(fdp); return (error); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(td, uap) struct thread *td; register struct ocreat_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, mode) = SCARG(uap, mode); SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; return (open(td, &nuap)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif /* ARGSUSED */ int mknod(td, uap) struct thread *td; register struct mknod_args /* { syscallarg(char *) path; syscallarg(int) mode; syscallarg(int) dev; } */ *uap; { struct vnode *vp; struct mount *mp; struct vattr vattr; int error; int whiteout = 0; struct nameidata nd; switch (SCARG(uap, mode) & S_IFMT) { case S_IFCHR: case S_IFBLK: error = suser(td); break; default: error = suser_cred(td->td_ucred, PRISON_ROOT); break; } if (error) return (error); restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { vrele(vp); error = EEXIST; } else { VATTR_NULL(&vattr); FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK(td->td_proc->p_fd); vattr.va_rdev = SCARG(uap, dev); whiteout = 0; switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } if (!error) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkfifo(td, uap) struct thread *td; register struct mkfifo_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct mount *mp; struct vattr vattr; int error; struct nameidata nd; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); vput(nd.ni_dvp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif /* ARGSUSED */ int link(td, uap) struct thread *td; register struct link_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct vnode *vp; struct mount *mp; struct nameidata nd; int error; bwillwrite(); NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type == VDIR) { vrele(vp); return (EPERM); /* POSIX */ } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); return (error); } NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { vrele(nd.ni_vp); error = EEXIST; } else { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); } vrele(vp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif /* ARGSUSED */ int symlink(td, uap) struct thread *td; register struct symlink_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct mount *mp; struct vattr vattr; char *path; int error; struct nameidata nd; path = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0) goto out; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); vput(nd.ni_dvp); error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); NDFREE(&nd, NDF_ONLY_PNBUF); if (error == 0) vput(nd.ni_vp); vput(nd.ni_dvp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: uma_zfree(namei_zone, path); return (error); } /* * Delete a whiteout from the filesystem. */ /* ARGSUSED */ int undelete(td, uap) struct thread *td; register struct undelete_args /* { syscallarg(char *) path; } */ *uap; { int error; struct mount *mp; struct nameidata nd; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), td); error = namei(&nd); if (error) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp) vrele(nd.ni_vp); vput(nd.ni_dvp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif /* ARGSUSED */ int unlink(td, uap) struct thread *td; struct unlink_args /* { syscallarg(char *) path; } */ *uap; { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_flag & VROOT) error = EBUSY; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (!error) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vput(vp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(td, uap) struct thread *td; register struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ *uap; { struct ucred *cred = td->td_ucred; struct file *fp; struct vnode *vp; struct vattr vattr; off_t offset; int error, noneg; if ((error = fget(td, uap->fd, &fp)) != 0) return (error); if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); return (ESPIPE); } vp = (struct vnode *)fp->f_data; noneg = (vp->v_type != VCHR); offset = SCARG(uap, offset); switch (SCARG(uap, whence)) { case L_INCR: if (noneg && (fp->f_offset < 0 || (offset > 0 && fp->f_offset > OFF_MAX - offset))) return (EOVERFLOW); offset += fp->f_offset; break; case L_XTND: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_GETATTR(vp, &vattr, cred, td); VOP_UNLOCK(vp, 0, td); if (error) return (error); if (noneg && (vattr.va_size > OFF_MAX || (offset > 0 && vattr.va_size > OFF_MAX - offset))) return (EOVERFLOW); offset += vattr.va_size; break; case L_SET: break; default: fdrop(fp, td); return (EINVAL); } if (noneg && offset < 0) return (EINVAL); fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; fdrop(fp, td); return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(td, uap) struct thread *td; register struct olseek_args /* { syscallarg(int) fd; syscallarg(long) offset; syscallarg(int) whence; } */ *uap; { struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ nuap; int error; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, offset) = SCARG(uap, offset); SCARG(&nuap, whence) = SCARG(uap, whence); error = lseek(td, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions using passed credentials. */ static int vn_access(vp, user_flags, cred, td) struct vnode *vp; int user_flags; struct ucred *cred; struct thread *td; { int error, flags; /* Flags == 0 means only check for existence. */ error = 0; if (user_flags) { flags = 0; if (user_flags & R_OK) flags |= VREAD; if (user_flags & W_OK) flags |= VWRITE; if (user_flags & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, td); } return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(td, uap) struct thread *td; register struct access_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { struct ucred *cred, *tmpcred; register struct vnode *vp; int error; struct nameidata nd; /* * Create and modify a temporary credential instead of one that * is potentially shared. This could also mess up socket * buffer accounting which can run in an interrupt context. * * XXX - Depending on how "threads" are finally implemented, it * may be better to explicitly pass the credential to namei() * rather than to modify the potentially shared process structure. */ cred = td->td_ucred; tmpcred = crdup(cred); tmpcred->cr_uid = cred->cr_ruid; tmpcred->cr_groups[0] = cred->cr_rgid; td->td_ucred = tmpcred; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) goto out1; vp = nd.ni_vp; error = vn_access(vp, SCARG(uap, flags), tmpcred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); out1: td->td_ucred = cred; crfree(tmpcred); return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int flags; }; #endif int eaccess(td, uap) struct thread *td; register struct eaccess_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { struct nameidata nd; struct vnode *vp; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int ostat(td, uap) struct thread *td; register struct ostat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_stat(nd.ni_vp, &sb, td); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int olstat(td, uap) struct thread *td; register struct olstat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct vnode *vp; struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int stat(td, uap) struct thread *td; register struct stat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { struct stat sb; int error; struct nameidata nd; #ifndef LOOKUP_EXCLUSIVE NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); #else NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); #endif if ((error = namei(&nd)) != 0) return (error); error = vn_stat(nd.ni_vp, &sb, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int lstat(td, uap) struct thread *td; register struct lstat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Implementation of the NetBSD stat() function. * XXX This should probably be collapsed with the FreeBSD version, * as the differences are only due to vn_stat() clearing spares at * the end of the structures. vn_stat could be split to avoid this, * and thus collapse the following to close to zero code. */ void cvtnstat(sb, nsb) struct stat *sb; struct nstat *nsb; { nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atimespec = sb->st_atimespec; nsb->st_mtimespec = sb->st_mtimespec; nsb->st_ctimespec = sb->st_ctimespec; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_qspare[0] = sb->st_qspare[0]; nsb->st_qspare[1] = sb->st_qspare[1]; } #ifndef _SYS_SYSPROTO_H_ struct nstat_args { char *path; struct nstat *ub; }; #endif /* ARGSUSED */ int nstat(td, uap) struct thread *td; register struct nstat_args /* { syscallarg(char *) path; syscallarg(struct nstat *) ub; } */ *uap; { struct stat sb; struct nstat nsb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_stat(nd.ni_vp, &sb, td); vput(nd.ni_vp); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb)); return (error); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int nlstat(td, uap) struct thread *td; register struct nlstat_args /* { syscallarg(char *) path; syscallarg(struct nstat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nstat nsb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_stat(vp, &sb, td); vput(vp); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif /* ARGSUSED */ int pathconf(td, uap) struct thread *td; register struct pathconf_args /* { syscallarg(char *) path; syscallarg(int) name; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif /* ARGSUSED */ int readlink(td, uap) struct thread *td; register struct readlink_args /* { syscallarg(char *) path; syscallarg(char *) buf; syscallarg(int) count; } */ *uap; { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, td->td_ucred); } vput(vp); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int setfflags(td, vp, flags) struct thread *td; struct vnode *vp; int flags; { int error; struct mount *mp; struct vattr vattr; /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = suser_cred(td->td_ucred, PRISON_ROOT); if (error) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_flags = flags; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif /* ARGSUSED */ int chflags(td, uap) struct thread *td; register struct chflags_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfflags(td, nd.ni_vp, SCARG(uap, flags)); vrele(nd.ni_vp); return error; } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif /* ARGSUSED */ int fchflags(td, uap) struct thread *td; register struct fchflags_args /* { syscallarg(int) fd; syscallarg(int) flags; } */ *uap; { struct file *fp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ static int setfmode(td, vp, mode) struct thread *td; struct vnode *vp; int mode; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return error; } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int chmod(td, uap) struct thread *td; register struct chmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, SCARG(uap, mode)); vrele(nd.ni_vp); return error; } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int lchmod(td, uap) struct thread *td; register struct lchmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, SCARG(uap, mode)); vrele(nd.ni_vp); return error; } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif /* ARGSUSED */ int fchmod(td, uap) struct thread *td; register struct fchmod_args /* { syscallarg(int) fd; syscallarg(int) mode; } */ *uap; { struct file *fp; struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ static int setfown(td, vp, uid, gid) struct thread *td; struct vnode *vp; uid_t uid; gid_t gid; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return error; } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int chown(td, uap) struct thread *td; register struct chown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); vrele(nd.ni_vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int lchown(td, uap) struct thread *td; register struct lchown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); vrele(nd.ni_vp); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif /* ARGSUSED */ int fchown(td, uap) struct thread *td; register struct fchown_args /* { syscallarg(int) fd; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { struct file *fp; struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; error = setfown(td, (struct vnode *)fp->f_data, SCARG(uap, uid), SCARG(uap, gid)); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int getutimes(usrtvp, tsp) const struct timeval *usrtvp; struct timespec *tsp; { struct timeval tv[2]; int error; if (usrtvp == NULL) { microtime(&tv[0]); TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); tsp[1] = tsp[0]; } else { if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0) return (error); TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); } return 0; } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int setutimes(td, vp, ts, nullflag) struct thread *td; struct vnode *vp; const struct timespec *ts; int nullflag; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return error; } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int utimes(td, uap) struct thread *td; register struct utimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { struct timespec ts[2]; struct timeval *usrtvp; int error; struct nameidata nd; usrtvp = SCARG(uap, tptr); if ((error = getutimes(usrtvp, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int lutimes(td, uap) struct thread *td; register struct lutimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { struct timespec ts[2]; struct timeval *usrtvp; int error; struct nameidata nd; usrtvp = SCARG(uap, tptr); if ((error = getutimes(usrtvp, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif /* ARGSUSED */ int futimes(td, uap) struct thread *td; register struct futimes_args /* { syscallarg(int ) fd; syscallarg(struct timeval *) tptr; } */ *uap; { struct timespec ts[2]; struct file *fp; struct timeval *usrtvp; int error; usrtvp = SCARG(uap, tptr); if ((error = getutimes(usrtvp, ts)) != 0) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); fdrop(fp, td); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif /* ARGSUSED */ int truncate(td, uap) struct thread *td; register struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; if (uap->length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); } vput(vp); vn_finished_write(mp); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif /* ARGSUSED */ int ftruncate(td, uap) struct thread *td; register struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct mount *mp; struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (uap->length < 0) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); if ((fp->f_flag & FWRITE) == 0) { fdrop(fp, td); return (EINVAL); } vp = (struct vnode *)fp->f_data; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { fdrop(fp, td); return (error); } VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); fdrop(fp, td); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif /* ARGSUSED */ int otruncate(td, uap) struct thread *td; register struct otruncate_args /* { syscallarg(char *) path; syscallarg(long) length; } */ *uap; { struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, length) = SCARG(uap, length); return (truncate(td, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif /* ARGSUSED */ int oftruncate(td, uap) struct thread *td; register struct oftruncate_args /* { syscallarg(int) fd; syscallarg(long) length; } */ *uap; { struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(td, &nuap)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif /* ARGSUSED */ int fsync(td, uap) struct thread *td; struct fsync_args /* { syscallarg(int) fd; } */ *uap; { struct vnode *vp; struct mount *mp; struct file *fp; vm_object_t obj; int error; GIANT_REQUIRED; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { fdrop(fp, td); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (VOP_GETVOBJECT(vp, &obj) == 0) { vm_object_page_clean(obj, 0, 0, 0); } error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td); #ifdef SOFTUPDATES if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) error = softdep_fsync(vp); #endif VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); fdrop(fp, td); return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif /* ARGSUSED */ int rename(td, uap) struct thread *td; register struct rename_args /* { syscallarg(char *) from; syscallarg(char *) to; } */ *uap; { struct mount *mp; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; bwillwrite(); NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, SCARG(uap, from), td); if ((error = namei(&fromnd)) != 0) return (error); fvp = fromnd.ni_vp; if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If source is the same as the destination (that is the * same inode number with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkdir(td, uap) struct thread *td; register struct mkdir_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td); } int vn_mkdir(path, mode, segflg, td) char *path; int mode; enum uio_seg segflg; struct thread *td; { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); vput(nd.ni_dvp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif /* ARGSUSED */ int rmdir(td, uap) struct thread *td; struct rmdir_args /* { syscallarg(char *) path; } */ *uap; { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) { error = EBUSY; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vput(vp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(td, uap) struct thread *td; register struct ogetdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; /* XXX arbitrary sanity limit on `count'. */ if (SCARG(uap, count) > 64 * 1024) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = SCARG(uap, count); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = SCARG(uap, count); MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); if (error) { fdrop(fp, td); return (error); } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; if (error) { fdrop(fp, td); return (error); } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); fdrop(fp, td); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(td, uap) struct thread *td; register struct getdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; int error, eofflag; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = SCARG(uap, count); /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); if (error) { fdrop(fp, td); return (error); } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; if (error) { fdrop(fp, td); return (error); } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } } if (SCARG(uap, basep) != NULL) { error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); } td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getdents_args { int fd; char *buf; size_t count; }; #endif int getdents(td, uap) struct thread *td; register struct getdents_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; } */ *uap; { struct getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return getdirentries(td, &ap); } /* * Set the mode mask for creation of filesystem nodes. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(td, uap) struct thread *td; struct umask_args /* { syscallarg(int) newmask; } */ *uap; { register struct filedesc *fdp; FILEDESC_LOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; FILEDESC_UNLOCK(td->td_proc->p_fd); return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif /* ARGSUSED */ int revoke(td, uap) struct thread *td; register struct revoke_args /* { syscallarg(char *) path; } */ *uap; { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VCHR) { vput(vp); return (EINVAL); } error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, td); if (td->td_ucred->cr_uid != vattr.va_uid) { error = suser_cred(td->td_ucred, PRISON_ROOT); if (error) goto out; } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto out; if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); vn_finished_write(mp); out: vrele(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry. * The file entry is locked upon returning. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { int error; struct file *fp; fp = NULL; if (fdp == NULL) error = EBADF; else { FILEDESC_LOCK(fdp); if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) error = EBADF; else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { fp = NULL; error = EINVAL; } else { fhold(fp); error = 0; } FILEDESC_UNLOCK(fdp); } *fpp = fp; return (error); } /* * Get (NFS) file handle */ #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int getfh(td, uap) struct thread *td; register struct getfh_args *uap; { struct nameidata nd; fhandle_t fh; register struct vnode *vp; int error; /* * Must be super user */ error = suser(td); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error) return (error); error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into * an open descriptor. * * warning: do not remove the suser() call or this becomes one giant * security hole. */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int fhopen(td, uap) struct thread *td; struct fhopen_args /* { syscallarg(const struct fhandle *) u_fhp; syscallarg(int) flags; } */ *uap; { struct proc *p = td->td_proc; struct mount *mp; struct vnode *vp; struct fhandle fhp; struct vattr vat; struct vattr *vap = &vat; struct flock lf; struct file *fp; register struct filedesc *fdp = p->p_fd; int fmode, mode, error, type; struct file *nfp; int indx; /* * Must be super user */ error = suser(td); if (error) return (error); fmode = FFLAGS(SCARG(uap, flags)); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp)); if (error) return(error); /* find the mount point */ mp = vfs_getvfs(&fhp.fh_fsid); if (mp == NULL) return (ESTALE); /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); if (error) return (error); /* * from now on we have to make sure not * to forget about the vnode * any error that causes an abort must vput(vp) * just set error = err and 'goto bad;'. */ /* * from vn_open */ if (vp->v_type == VLNK) { error = EMLINK; goto bad; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } mode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } error = vn_writechk(vp); if (error) goto bad; mode |= VWRITE; } if (fmode & FREAD) mode |= VREAD; if (mode) { error = VOP_ACCESS(vp, mode, td->td_ucred, td); if (error) goto bad; } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, td); /* XXX */ if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); return (error); } VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, td->td_ucred, td); vn_finished_write(mp); if (error) goto bad; } error = VOP_OPEN(vp, fmode, td->td_ucred, td); if (error) goto bad; /* * Make sure that a VM object is created for VMIO support. */ if (vn_canvmio(vp) == TRUE) { if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0) goto bad; } if (fmode & FWRITE) vp->v_writecount++; /* * end of vn_open code */ if ((error = falloc(td, &nfp, &indx)) != 0) { if (fmode & FWRITE) vp->v_writecount--; goto bad; } fp = nfp; /* * Hold an extra reference to avoid having fp ripped out * from under us while we block in the lock op */ fhold(fp); nfp->f_data = (caddr_t)vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; nfp->f_type = DTYPE_VNODE; if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (fmode & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, td); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { /* * The lock request failed. Normally close the * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; FILEDESC_UNLOCK(fdp); fdrop(fp, td); } else FILEDESC_UNLOCK(fdp); /* * release our private reference */ fdrop(fp, td); return(error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); fp->f_flag |= FHASLOCK; } if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) vfs_object_create(vp, td, td->td_ucred); VOP_UNLOCK(vp, 0, td); fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: vput(vp); return (error); } /* * Stat an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int fhstat(td, uap) struct thread *td; register struct fhstat_args /* { syscallarg(struct fhandle *) u_fhp; syscallarg(struct stat *) sb; } */ *uap; { struct stat sb; fhandle_t fh; struct mount *mp; struct vnode *vp; int error; /* * Must be super user */ error = suser(td); if (error) return (error); error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t)); if (error) return (error); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) return (ESTALE); if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) return (error); error = vn_stat(vp, &sb, td); vput(vp); if (error) return (error); error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int fhstatfs(td, uap) struct thread *td; struct fhstatfs_args /* { syscallarg(struct fhandle) *u_fhp; syscallarg(struct statfs) *buf; } */ *uap; { struct statfs *sp; struct mount *mp; struct vnode *vp; struct statfs sb; fhandle_t fh; int error; /* * Must be super user */ error = suser(td); if (error) return (error); if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0) return (error); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) return (ESTALE); if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) return (error); mp = vp->v_mount; sp = &mp->mnt_stat; vput(vp); if ((error = VFS_STATFS(mp, sp, td)) != 0) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (suser(td)) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); } /* * Syscall to push extended attribute configuration information into the * VFS. Accepts a path, which it converts to a mountpoint, as well as * a command (int cmd), and attribute name and misc data. For now, the * attribute name is left in userspace for consumption by the VFS_op. * It will probably be changed to be copied into sysspace by the * syscall in the future, once issues with various consumers of the * attribute code have raised their hands. * * Currently this is used only by UFS Extended Attributes. */ int extattrctl(td, uap) struct thread *td; struct extattrctl_args /* { syscallarg(const char *) path; syscallarg(int) cmd; syscallarg(const char *) filename; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; } */ *uap; { struct vnode *filename_vp; struct nameidata nd; struct mount *mp, *mp_writable; char attrname[EXTATTR_MAXNAMELEN]; int error; /* * uap->attrname is not always defined. We check again later when we * invoke the VFS call so as to pass in NULL there if needed. */ if (uap->attrname != NULL) { error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); } /* * uap->filename is not always defined. If it is, grab a vnode lock, * which VFS_EXTATTRCTL() will later release. */ filename_vp = NULL; if (uap->filename != NULL) { NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->filename, td); if ((error = namei(&nd)) != 0) return (error); filename_vp = nd.ni_vp; NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK); } /* uap->path is always defined. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) { if (filename_vp != NULL) vput(filename_vp); return (error); } mp = nd.ni_vp->v_mount; error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH); NDFREE(&nd, 0); if (error) { if (filename_vp != NULL) vput(filename_vp); return (error); } if (uap->attrname != NULL) { error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace, attrname, td); } else { error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace, NULL, td); } vn_finished_write(mp_writable); /* * VFS_EXTATTRCTL will have unlocked, but not de-ref'd, * filename_vp, so vrele it if it is defined. */ if (filename_vp != NULL) vrele(filename_vp); return (error); } /*- * Set a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct mount *mp; struct uio auio; struct iovec aiov; ssize_t cnt; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; cnt = nbytes; error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, td->td_ucred, td); cnt -= auio.uio_resid; td->td_retval[0] = cnt; done: VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_set_file(td, uap) struct thread *td; struct extattr_set_file_args /* { syscallarg(const char *) path; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; syscallarg(void *) data; syscallarg(size_t) nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); return (error); } int extattr_set_fd(td, uap) struct thread *td; struct extattr_set_fd_args /* { syscallarg(int) fd; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; syscallarg(void *) data; syscallarg(size_t) nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); return (error); } /*- * Get a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct uio auio; struct iovec aiov; ssize_t cnt; size_t size; int error; VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Slightly unusual semantics: if the user provides a NULL data * pointer, they don't want to receive the data, just the * maximum read length. */ if (data != NULL) { aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; cnt = nbytes; error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td->td_ucred, td); cnt -= auio.uio_resid; td->td_retval[0] = cnt; } else { error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL, &size, td->td_ucred, td); td->td_retval[0] = size; } done: VOP_UNLOCK(vp, 0, td); return (error); } int extattr_get_file(td, uap) struct thread *td; struct extattr_get_file_args /* { syscallarg(const char *) path; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; syscallarg(void *) data; syscallarg(size_t) nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); return (error); } int extattr_get_fd(td, uap) struct thread *td; struct extattr_get_fd_args /* { syscallarg(int) fd; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; syscallarg(void *) data; syscallarg(size_t) nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); return (error); } /* * extattr_delete_vp(): Delete a named extended attribute on a file or * directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", proc "p" * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname, struct thread *td) { struct mount *mp; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_delete_file(td, uap) struct thread *td; struct extattr_delete_file_args /* { syscallarg(const char *) path; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return(error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return(error); NDFREE(&nd, NDF_ONLY_PNBUF); error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); vrele(nd.ni_vp); return(error); } int extattr_delete_fd(td, uap) struct thread *td; struct extattr_delete_fd_args /* { syscallarg(int) fd; syscallarg(int) attrnamespace; syscallarg(const char *) attrname; } */ *uap; { struct file *fp; struct vnode *vp; char attrname[EXTATTR_MAXNAMELEN]; int error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; error = extattr_delete_vp((struct vnode *)fp->f_data, uap->attrnamespace, attrname, td); fdrop(fp, td); return (error); }