2005-01-06 18:10:42 +00:00
|
|
|
/*-
|
2004-10-22 09:59:37 +00:00
|
|
|
* Copyright (c) 2000-2004
|
2000-08-20 21:34:39 +00:00
|
|
|
* Poul-Henning Kamp. All rights reserved.
|
2004-10-22 09:59:37 +00:00
|
|
|
* Copyright (c) 1989, 1992-1993, 1995
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
2000-08-20 21:34:39 +00:00
|
|
|
*
|
|
|
|
* This code is derived from software donated to Berkeley by
|
|
|
|
* Jan-Simon Pendry.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95
|
|
|
|
* From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
|
|
|
|
*
|
|
|
|
* $FreeBSD$
|
|
|
|
*/
|
|
|
|
|
2001-05-23 17:48:20 +00:00
|
|
|
/*
|
|
|
|
* TODO:
|
|
|
|
* mkdir: want it ?
|
|
|
|
*/
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/conf.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/dirent.h>
|
2004-10-22 09:59:37 +00:00
|
|
|
#include <sys/fcntl.h>
|
2004-11-08 10:46:47 +00:00
|
|
|
#include <sys/file.h>
|
|
|
|
#include <sys/filedesc.h>
|
|
|
|
#include <sys/filio.h>
|
2009-06-20 14:50:32 +00:00
|
|
|
#include <sys/jail.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/lock.h>
|
2000-08-20 21:34:39 +00:00
|
|
|
#include <sys/malloc.h>
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
#include <sys/mman.h>
|
2000-08-20 21:34:39 +00:00
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/namei.h>
|
2006-11-06 13:42:10 +00:00
|
|
|
#include <sys/priv.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/proc.h>
|
2004-10-22 09:59:37 +00:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/sx.h>
|
2015-03-21 01:14:11 +00:00
|
|
|
#include <sys/sysctl.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/time.h>
|
2004-11-08 10:46:47 +00:00
|
|
|
#include <sys/ttycom.h>
|
2001-11-25 21:00:38 +00:00
|
|
|
#include <sys/unistd.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/vnode.h>
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2005-09-12 08:03:15 +00:00
|
|
|
static struct vop_vector devfs_vnodeops;
|
|
|
|
static struct fileops devfs_ops_f;
|
2002-10-01 10:08:08 +00:00
|
|
|
|
2005-09-12 08:03:15 +00:00
|
|
|
#include <fs/devfs/devfs.h>
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
#include <fs/devfs/devfs_int.h>
|
|
|
|
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
#include <vm/vm.h>
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
#include <vm/vm_object.h>
|
|
|
|
|
2008-05-21 09:31:44 +00:00
|
|
|
static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
|
|
|
|
|
2006-10-18 11:17:14 +00:00
|
|
|
struct mtx devfs_de_interlock;
|
2006-07-12 20:25:35 +00:00
|
|
|
MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
|
2007-07-03 17:42:37 +00:00
|
|
|
struct sx clone_drain_lock;
|
|
|
|
SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock");
|
2008-05-21 09:31:44 +00:00
|
|
|
struct mtx cdevpriv_mtx;
|
|
|
|
MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
|
2006-07-12 20:25:35 +00:00
|
|
|
|
2015-03-21 01:14:11 +00:00
|
|
|
SYSCTL_DECL(_vfs_devfs);
|
|
|
|
|
|
|
|
static int devfs_dotimes;
|
|
|
|
SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
|
2015-04-01 08:25:40 +00:00
|
|
|
&devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update devfs node timestamp. Note that updates are unlocked and
|
|
|
|
* stat(2) could see partially updated times.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
devfs_timestamp(struct timespec *tsp)
|
|
|
|
{
|
|
|
|
time_t ts;
|
|
|
|
|
|
|
|
if (devfs_dotimes) {
|
|
|
|
vfs_timestamp(tsp);
|
|
|
|
} else {
|
|
|
|
ts = time_second;
|
|
|
|
if (tsp->tv_sec != ts) {
|
|
|
|
tsp->tv_sec = ts;
|
|
|
|
tsp->tv_nsec = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-03-21 01:14:11 +00:00
|
|
|
|
2004-11-13 23:21:54 +00:00
|
|
|
static int
|
2010-08-06 09:42:15 +00:00
|
|
|
devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
|
|
|
|
int *ref)
|
2004-11-13 23:21:54 +00:00
|
|
|
{
|
|
|
|
|
2010-08-06 09:42:15 +00:00
|
|
|
*dswp = devvn_refthread(fp->f_vnode, devp, ref);
|
2006-10-20 07:59:50 +00:00
|
|
|
if (*devp != fp->f_data) {
|
|
|
|
if (*dswp != NULL)
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(*devp, *ref);
|
2004-11-13 23:21:54 +00:00
|
|
|
return (ENXIO);
|
2006-10-20 07:59:50 +00:00
|
|
|
}
|
2004-11-13 23:21:54 +00:00
|
|
|
KASSERT((*devp)->si_refcount > 0,
|
|
|
|
("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
|
|
|
|
if (*dswp == NULL)
|
|
|
|
return (ENXIO);
|
2008-05-21 09:31:44 +00:00
|
|
|
curthread->td_fpop = fp;
|
2004-11-13 23:21:54 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-05-21 09:31:44 +00:00
|
|
|
int
|
|
|
|
devfs_get_cdevpriv(void **datap)
|
|
|
|
{
|
|
|
|
struct file *fp;
|
|
|
|
struct cdev_privdata *p;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
fp = curthread->td_fpop;
|
|
|
|
if (fp == NULL)
|
|
|
|
return (EBADF);
|
|
|
|
p = fp->f_cdevpriv;
|
|
|
|
if (p != NULL) {
|
|
|
|
error = 0;
|
|
|
|
*datap = p->cdpd_data;
|
|
|
|
} else
|
|
|
|
error = ENOENT;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t priv_dtr)
|
|
|
|
{
|
|
|
|
struct file *fp;
|
|
|
|
struct cdev_priv *cdp;
|
|
|
|
struct cdev_privdata *p;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
fp = curthread->td_fpop;
|
|
|
|
if (fp == NULL)
|
|
|
|
return (ENOENT);
|
2008-06-16 17:34:59 +00:00
|
|
|
cdp = cdev2priv((struct cdev *)fp->f_data);
|
2008-05-21 09:31:44 +00:00
|
|
|
p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
|
|
|
|
p->cdpd_data = priv;
|
|
|
|
p->cdpd_dtr = priv_dtr;
|
|
|
|
p->cdpd_fp = fp;
|
|
|
|
mtx_lock(&cdevpriv_mtx);
|
|
|
|
if (fp->f_cdevpriv == NULL) {
|
|
|
|
LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
|
|
|
|
fp->f_cdevpriv = p;
|
|
|
|
mtx_unlock(&cdevpriv_mtx);
|
|
|
|
error = 0;
|
|
|
|
} else {
|
|
|
|
mtx_unlock(&cdevpriv_mtx);
|
|
|
|
free(p, M_CDEVPDATA);
|
|
|
|
error = EBUSY;
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
devfs_destroy_cdevpriv(struct cdev_privdata *p)
|
|
|
|
{
|
|
|
|
|
|
|
|
mtx_assert(&cdevpriv_mtx, MA_OWNED);
|
|
|
|
p->cdpd_fp->f_cdevpriv = NULL;
|
|
|
|
LIST_REMOVE(p, cdpd_list);
|
|
|
|
mtx_unlock(&cdevpriv_mtx);
|
|
|
|
(p->cdpd_dtr)(p->cdpd_data);
|
|
|
|
free(p, M_CDEVPDATA);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
devfs_fpdrop(struct file *fp)
|
|
|
|
{
|
|
|
|
struct cdev_privdata *p;
|
|
|
|
|
|
|
|
mtx_lock(&cdevpriv_mtx);
|
|
|
|
if ((p = fp->f_cdevpriv) == NULL) {
|
|
|
|
mtx_unlock(&cdevpriv_mtx);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
devfs_destroy_cdevpriv(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
devfs_clear_cdevpriv(void)
|
|
|
|
{
|
|
|
|
struct file *fp;
|
|
|
|
|
|
|
|
fp = curthread->td_fpop;
|
|
|
|
if (fp == NULL)
|
|
|
|
return;
|
|
|
|
devfs_fpdrop(fp);
|
|
|
|
}
|
|
|
|
|
2010-08-22 16:08:12 +00:00
|
|
|
/*
|
|
|
|
* On success devfs_populate_vp() returns with dmp->dm_lock held.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
devfs_populate_vp(struct vnode *vp)
|
|
|
|
{
|
2010-09-15 14:23:55 +00:00
|
|
|
struct devfs_dirent *de;
|
2010-08-22 16:08:12 +00:00
|
|
|
struct devfs_mount *dmp;
|
|
|
|
int locked;
|
|
|
|
|
|
|
|
ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
|
|
|
|
|
|
|
|
dmp = VFSTODEVFS(vp->v_mount);
|
|
|
|
locked = VOP_ISLOCKED(vp);
|
|
|
|
|
|
|
|
sx_xlock(&dmp->dm_lock);
|
|
|
|
DEVFS_DMP_HOLD(dmp);
|
|
|
|
|
|
|
|
/* Can't call devfs_populate() with the vnode lock held. */
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
devfs_populate(dmp);
|
|
|
|
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
vn_lock(vp, locked | LK_RETRY);
|
|
|
|
sx_xlock(&dmp->dm_lock);
|
|
|
|
if (DEVFS_DMP_DROP(dmp)) {
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
devfs_unmount_final(dmp);
|
|
|
|
return (EBADF);
|
|
|
|
}
|
2010-09-15 14:23:55 +00:00
|
|
|
if ((vp->v_iflag & VI_DOOMED) != 0) {
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
return (EBADF);
|
|
|
|
}
|
|
|
|
de = vp->v_data;
|
|
|
|
KASSERT(de != NULL,
|
|
|
|
("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
|
|
|
|
if ((de->de_flags & DE_DOOMED) != 0) {
|
2010-08-22 16:08:12 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
return (EBADF);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-12-12 01:00:38 +00:00
|
|
|
static int
|
|
|
|
devfs_vptocnp(struct vop_vptocnp_args *ap)
|
|
|
|
{
|
|
|
|
struct vnode *vp = ap->a_vp;
|
|
|
|
struct vnode **dvp = ap->a_vpp;
|
|
|
|
struct devfs_mount *dmp;
|
|
|
|
char *buf = ap->a_buf;
|
|
|
|
int *buflen = ap->a_buflen;
|
|
|
|
struct devfs_dirent *dd, *de;
|
|
|
|
int i, error;
|
|
|
|
|
|
|
|
dmp = VFSTODEVFS(vp->v_mount);
|
2010-09-15 14:23:55 +00:00
|
|
|
|
|
|
|
error = devfs_populate_vp(vp);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
2008-12-12 01:00:38 +00:00
|
|
|
i = *buflen;
|
|
|
|
dd = vp->v_data;
|
|
|
|
|
|
|
|
if (vp->v_type == VCHR) {
|
|
|
|
i -= strlen(dd->de_cdp->cdp_c.si_name);
|
|
|
|
if (i < 0) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto finished;
|
|
|
|
}
|
|
|
|
bcopy(dd->de_cdp->cdp_c.si_name, buf + i,
|
|
|
|
strlen(dd->de_cdp->cdp_c.si_name));
|
|
|
|
de = dd->de_dir;
|
|
|
|
} else if (vp->v_type == VDIR) {
|
|
|
|
if (dd == dmp->dm_rootdir) {
|
|
|
|
*dvp = vp;
|
2011-11-19 07:50:49 +00:00
|
|
|
vref(*dvp);
|
2008-12-12 01:00:38 +00:00
|
|
|
goto finished;
|
|
|
|
}
|
|
|
|
i -= dd->de_dirent->d_namlen;
|
|
|
|
if (i < 0) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto finished;
|
|
|
|
}
|
|
|
|
bcopy(dd->de_dirent->d_name, buf + i,
|
|
|
|
dd->de_dirent->d_namlen);
|
|
|
|
de = dd;
|
|
|
|
} else {
|
|
|
|
error = ENOENT;
|
|
|
|
goto finished;
|
|
|
|
}
|
|
|
|
*buflen = i;
|
2010-06-09 15:29:12 +00:00
|
|
|
de = devfs_parent_dirent(de);
|
|
|
|
if (de == NULL) {
|
|
|
|
error = ENOENT;
|
|
|
|
goto finished;
|
|
|
|
}
|
2008-12-12 01:00:38 +00:00
|
|
|
mtx_lock(&devfs_de_interlock);
|
|
|
|
*dvp = de->de_vnode;
|
|
|
|
if (*dvp != NULL) {
|
|
|
|
VI_LOCK(*dvp);
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
|
|
|
vholdl(*dvp);
|
|
|
|
VI_UNLOCK(*dvp);
|
2011-11-19 07:50:49 +00:00
|
|
|
vref(*dvp);
|
|
|
|
vdrop(*dvp);
|
2008-12-12 11:10:10 +00:00
|
|
|
} else {
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
2008-12-12 01:00:38 +00:00
|
|
|
error = ENOENT;
|
2008-12-12 11:10:10 +00:00
|
|
|
}
|
2008-12-12 01:00:38 +00:00
|
|
|
finished:
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2001-05-23 17:48:20 +00:00
|
|
|
/*
|
2010-09-21 16:49:02 +00:00
|
|
|
* Construct the fully qualified path name relative to the mountpoint.
|
|
|
|
* If a NULL cnp is provided, no '/' is appended to the resulting path.
|
2003-03-02 15:56:49 +00:00
|
|
|
*/
|
2010-09-21 16:49:02 +00:00
|
|
|
char *
|
|
|
|
devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
|
|
|
|
struct componentname *cnp)
|
2001-05-23 17:48:20 +00:00
|
|
|
{
|
|
|
|
int i;
|
2010-09-21 16:49:02 +00:00
|
|
|
struct devfs_dirent *de;
|
|
|
|
|
|
|
|
sx_assert(&dmp->dm_lock, SA_LOCKED);
|
2001-05-23 17:48:20 +00:00
|
|
|
|
|
|
|
i = SPECNAMELEN;
|
|
|
|
buf[i] = '\0';
|
2010-09-21 16:49:02 +00:00
|
|
|
if (cnp != NULL)
|
|
|
|
i -= cnp->cn_namelen;
|
2001-05-23 17:48:20 +00:00
|
|
|
if (i < 0)
|
|
|
|
return (NULL);
|
2010-09-21 16:49:02 +00:00
|
|
|
if (cnp != NULL)
|
|
|
|
bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
|
2001-05-23 17:48:20 +00:00
|
|
|
de = dd;
|
2005-08-15 19:40:53 +00:00
|
|
|
while (de != dmp->dm_rootdir) {
|
2010-09-21 16:49:02 +00:00
|
|
|
if (cnp != NULL || i < SPECNAMELEN) {
|
|
|
|
i--;
|
|
|
|
if (i < 0)
|
|
|
|
return (NULL);
|
|
|
|
buf[i] = '/';
|
|
|
|
}
|
2001-05-23 17:48:20 +00:00
|
|
|
i -= de->de_dirent->d_namlen;
|
|
|
|
if (i < 0)
|
|
|
|
return (NULL);
|
|
|
|
bcopy(de->de_dirent->d_name, buf + i,
|
|
|
|
de->de_dirent->d_namlen);
|
2010-06-09 15:29:12 +00:00
|
|
|
de = devfs_parent_dirent(de);
|
|
|
|
if (de == NULL)
|
|
|
|
return (NULL);
|
2001-05-23 17:48:20 +00:00
|
|
|
}
|
|
|
|
return (buf + i);
|
|
|
|
}
|
|
|
|
|
2006-09-18 13:23:08 +00:00
|
|
|
static int
|
|
|
|
devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
|
|
|
|
struct devfs_dirent *de)
|
|
|
|
{
|
|
|
|
int not_found;
|
|
|
|
|
|
|
|
not_found = 0;
|
|
|
|
if (de->de_flags & DE_DOOMED)
|
|
|
|
not_found = 1;
|
|
|
|
if (DEVFS_DE_DROP(de)) {
|
|
|
|
KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
|
|
|
|
devfs_dirent_free(de);
|
|
|
|
}
|
|
|
|
if (DEVFS_DMP_DROP(dmp)) {
|
|
|
|
KASSERT(not_found == 1,
|
|
|
|
("DEVFS mount struct freed before dirent"));
|
|
|
|
not_found = 2;
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
devfs_unmount_final(dmp);
|
|
|
|
}
|
2006-09-19 14:03:02 +00:00
|
|
|
if (not_found == 1 || (drop_dm_lock && not_found != 2))
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_unlock(&dmp->dm_lock);
|
|
|
|
return (not_found);
|
|
|
|
}
|
|
|
|
|
2007-03-13 01:50:27 +00:00
|
|
|
static void
|
|
|
|
devfs_insmntque_dtr(struct vnode *vp, void *arg)
|
|
|
|
{
|
|
|
|
struct devfs_dirent *de;
|
|
|
|
|
|
|
|
de = (struct devfs_dirent *)arg;
|
|
|
|
mtx_lock(&devfs_de_interlock);
|
|
|
|
vp->v_data = NULL;
|
|
|
|
de->de_vnode = NULL;
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
|
|
|
vgone(vp);
|
|
|
|
vput(vp);
|
|
|
|
}
|
|
|
|
|
2006-09-18 13:23:08 +00:00
|
|
|
/*
|
|
|
|
* devfs_allocv shall be entered with dmp->dm_lock held, and it drops
|
|
|
|
* it on return.
|
|
|
|
*/
|
2000-08-27 14:46:36 +00:00
|
|
|
int
|
2010-08-06 09:23:47 +00:00
|
|
|
devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
|
|
|
|
struct vnode **vpp)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct vnode *vp;
|
2004-06-16 09:47:26 +00:00
|
|
|
struct cdev *dev;
|
2006-09-18 13:23:08 +00:00
|
|
|
struct devfs_mount *dmp;
|
2010-08-06 09:06:55 +00:00
|
|
|
struct cdevsw *dsw;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2006-09-18 13:23:08 +00:00
|
|
|
dmp = VFSTODEVFS(mp);
|
|
|
|
if (de->de_flags & DE_DOOMED) {
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2011-07-13 21:07:41 +00:00
|
|
|
loop:
|
2006-09-18 13:23:08 +00:00
|
|
|
DEVFS_DE_HOLD(de);
|
|
|
|
DEVFS_DMP_HOLD(dmp);
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_lock(&devfs_de_interlock);
|
2000-08-20 21:34:39 +00:00
|
|
|
vp = de->de_vnode;
|
|
|
|
if (vp != NULL) {
|
2006-07-12 20:25:35 +00:00
|
|
|
VI_LOCK(vp);
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2011-07-13 21:07:41 +00:00
|
|
|
vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_xlock(&dmp->dm_lock);
|
|
|
|
if (devfs_allocv_drop_refs(0, dmp, de)) {
|
2011-07-13 21:07:41 +00:00
|
|
|
vput(vp);
|
2006-09-18 13:23:08 +00:00
|
|
|
return (ENOENT);
|
|
|
|
}
|
2011-07-13 21:07:41 +00:00
|
|
|
else if ((vp->v_iflag & VI_DOOMED) != 0) {
|
|
|
|
mtx_lock(&devfs_de_interlock);
|
|
|
|
if (de->de_vnode == vp) {
|
|
|
|
de->de_vnode = NULL;
|
|
|
|
vp->v_data = NULL;
|
|
|
|
}
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
|
|
|
vput(vp);
|
|
|
|
goto loop;
|
2008-05-23 16:36:39 +00:00
|
|
|
}
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2000-08-20 21:34:39 +00:00
|
|
|
*vpp = vp;
|
|
|
|
return (0);
|
|
|
|
}
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_unlock(&devfs_de_interlock);
|
2000-09-06 11:26:43 +00:00
|
|
|
if (de->de_dirent->d_type == DT_CHR) {
|
2006-09-18 13:23:08 +00:00
|
|
|
if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
|
|
|
|
devfs_allocv_drop_refs(1, dmp, de);
|
2000-09-06 11:26:43 +00:00
|
|
|
return (ENOENT);
|
2006-09-18 13:23:08 +00:00
|
|
|
}
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
dev = &de->de_cdp->cdp_c;
|
2000-09-06 11:26:43 +00:00
|
|
|
} else {
|
2004-06-17 17:16:53 +00:00
|
|
|
dev = NULL;
|
2000-09-06 11:26:43 +00:00
|
|
|
}
|
2004-12-01 23:16:38 +00:00
|
|
|
error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
|
2000-08-20 21:34:39 +00:00
|
|
|
if (error != 0) {
|
2006-09-18 13:23:08 +00:00
|
|
|
devfs_allocv_drop_refs(1, dmp, de);
|
2000-08-20 21:34:39 +00:00
|
|
|
printf("devfs_allocv: failed to allocate new vnode\n");
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (de->de_dirent->d_type == DT_CHR) {
|
|
|
|
vp->v_type = VCHR;
|
2004-12-07 08:15:41 +00:00
|
|
|
VI_LOCK(vp);
|
|
|
|
dev_lock();
|
2005-03-31 06:51:54 +00:00
|
|
|
dev_refl(dev);
|
2007-07-26 16:58:09 +00:00
|
|
|
/* XXX: v_rdev should be protect by vnode lock */
|
2004-12-07 08:15:41 +00:00
|
|
|
vp->v_rdev = dev;
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
KASSERT(vp->v_usecount == 1,
|
|
|
|
("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
|
2004-12-07 08:15:41 +00:00
|
|
|
dev->si_usecount += vp->v_usecount;
|
2010-08-06 09:06:55 +00:00
|
|
|
/* Special casing of ttys for deadfs. Probably redundant. */
|
|
|
|
dsw = dev->si_devsw;
|
|
|
|
if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
|
|
|
|
vp->v_vflag |= VV_ISTTY;
|
2004-12-07 08:15:41 +00:00
|
|
|
dev_unlock();
|
|
|
|
VI_UNLOCK(vp);
|
2010-08-06 09:42:15 +00:00
|
|
|
if ((dev->si_flags & SI_ETERNAL) != 0)
|
|
|
|
vp->v_vflag |= VV_ETERNALDEV;
|
2004-12-01 23:16:38 +00:00
|
|
|
vp->v_op = &devfs_specops;
|
2000-08-20 21:34:39 +00:00
|
|
|
} else if (de->de_dirent->d_type == DT_DIR) {
|
|
|
|
vp->v_type = VDIR;
|
|
|
|
} else if (de->de_dirent->d_type == DT_LNK) {
|
|
|
|
vp->v_type = VLNK;
|
|
|
|
} else {
|
|
|
|
vp->v_type = VBAD;
|
|
|
|
}
|
2008-06-05 09:15:47 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
|
2010-08-20 19:46:50 +00:00
|
|
|
VN_LOCK_ASHARE(vp);
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_lock(&devfs_de_interlock);
|
2000-08-24 15:36:55 +00:00
|
|
|
vp->v_data = de;
|
2000-08-20 21:34:39 +00:00
|
|
|
de->de_vnode = vp;
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_unlock(&devfs_de_interlock);
|
2007-03-13 01:50:27 +00:00
|
|
|
error = insmntque1(vp, mp, devfs_insmntque_dtr, de);
|
|
|
|
if (error != 0) {
|
|
|
|
(void) devfs_allocv_drop_refs(1, dmp, de);
|
|
|
|
return (error);
|
|
|
|
}
|
2006-09-18 13:23:08 +00:00
|
|
|
if (devfs_allocv_drop_refs(0, dmp, de)) {
|
|
|
|
vput(vp);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2002-07-31 15:45:16 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_devfs_vnode_associate(mp, de, vp);
|
2002-07-31 15:45:16 +00:00
|
|
|
#endif
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2000-08-20 21:34:39 +00:00
|
|
|
*vpp = vp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_access(struct vop_access_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp = ap->a_vp;
|
2000-08-21 14:45:19 +00:00
|
|
|
struct devfs_dirent *de;
|
2014-11-03 03:12:15 +00:00
|
|
|
struct proc *p;
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
int error;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2000-08-24 15:36:55 +00:00
|
|
|
de = vp->v_data;
|
|
|
|
if (vp->v_type == VDIR)
|
|
|
|
de = de->de_dir;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
|
2008-10-28 13:44:11 +00:00
|
|
|
ap->a_accmode, ap->a_cred, NULL);
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
if (error == 0)
|
|
|
|
return (0);
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
if (error != EACCES)
|
|
|
|
return (error);
|
2014-11-03 03:12:15 +00:00
|
|
|
p = ap->a_td->td_proc;
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
/* We do, however, allow access to the controlling terminal */
|
2014-11-03 03:12:15 +00:00
|
|
|
PROC_LOCK(p);
|
|
|
|
if (!(p->p_flag & P_CONTROLT)) {
|
|
|
|
PROC_UNLOCK(p);
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
return (error);
|
2014-11-03 03:12:15 +00:00
|
|
|
}
|
|
|
|
if (p->p_session->s_ttydp == de->de_cdp)
|
|
|
|
error = 0;
|
|
|
|
PROC_UNLOCK(p);
|
Even if the permissions deny it, a process should be allowed to
access its controlling terminal.
In essense, history dictates that any process is allowed to open
/dev/tty for RW, irrespective of credential, because by definition
it is it's own controlling terminal.
Before DEVFS we relied on a hacky half-device thing (kern/tty_tty.c)
which did the magic deep down at device level, which at best was
disgusting from an architectural point of view.
My first shot at this was to use the cloning mechanism to simply
give people the right tty when they ask for /dev/tty, that's why
you get this, slightly counter intuitive result:
syv# ls -l /dev/tty `tty`
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/tty
crw--w---- 1 u1 tty 5, 0 Jan 13 22:14 /dev/ttyp0
Trouble is, when user u1 su(1)'s to user u2, he cannot open
/dev/ttyp0 anymore because he doesn't have permission to do so.
The above fix allows him to do that.
The interesting side effect is that one was previously only able
to access the controlling tty by indirection:
date > /dev/tty
but not by name:
date > `tty`
This is now possible, and that feels a lot more like DTRT.
PR: 46635
MFC candidate: could be.
2003-01-13 22:20:36 +00:00
|
|
|
return (error);
|
2000-08-20 21:34:39 +00:00
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/* ARGSUSED */
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_close(struct vop_close_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp = ap->a_vp, *oldvp;
|
|
|
|
struct thread *td = ap->a_td;
|
2014-11-03 03:12:15 +00:00
|
|
|
struct proc *p;
|
2004-10-22 09:59:37 +00:00
|
|
|
struct cdev *dev = vp->v_rdev;
|
|
|
|
struct cdevsw *dsw;
|
2010-08-06 09:42:15 +00:00
|
|
|
int vp_locked, error, ref;
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2010-06-01 18:57:21 +00:00
|
|
|
/*
|
|
|
|
* XXX: Don't call d_close() if we were called because of
|
|
|
|
* XXX: insmntque1() failure.
|
|
|
|
*/
|
|
|
|
if (vp->v_data == NULL)
|
|
|
|
return (0);
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/*
|
|
|
|
* Hack: a tty device that is a controlling terminal
|
|
|
|
* has a reference from the session structure.
|
|
|
|
* We cannot easily tell that a character device is
|
|
|
|
* a controlling terminal, unless it is the closing
|
|
|
|
* process' controlling terminal. In that case,
|
|
|
|
* if the reference count is 2 (this last descriptor
|
|
|
|
* plus the session), release the reference from the session.
|
|
|
|
*/
|
2014-11-03 03:12:15 +00:00
|
|
|
if (td != NULL) {
|
|
|
|
p = td->td_proc;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
if (vp == p->p_session->s_ttyvp) {
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
oldvp = NULL;
|
|
|
|
sx_xlock(&proctree_lock);
|
|
|
|
if (vp == p->p_session->s_ttyvp) {
|
|
|
|
SESS_LOCK(p->p_session);
|
|
|
|
VI_LOCK(vp);
|
|
|
|
if (count_dev(dev) == 2 &&
|
|
|
|
(vp->v_iflag & VI_DOOMED) == 0) {
|
|
|
|
p->p_session->s_ttyvp = NULL;
|
|
|
|
p->p_session->s_ttydp = NULL;
|
|
|
|
oldvp = vp;
|
|
|
|
}
|
|
|
|
VI_UNLOCK(vp);
|
|
|
|
SESS_UNLOCK(p->p_session);
|
2014-10-06 06:20:35 +00:00
|
|
|
}
|
2014-11-03 03:12:15 +00:00
|
|
|
sx_xunlock(&proctree_lock);
|
|
|
|
if (oldvp != NULL)
|
|
|
|
vrele(oldvp);
|
|
|
|
} else
|
|
|
|
PROC_UNLOCK(p);
|
2004-10-22 09:59:37 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* We do not want to really close the device if it
|
|
|
|
* is still in use unless we are trying to close it
|
|
|
|
* forcibly. Since every use (buffer, vnode, swap, cmap)
|
|
|
|
* holds a reference to the vnode, and because we mark
|
|
|
|
* any other vnodes that alias this device, when the
|
|
|
|
* sum of the reference counts on all the aliased
|
|
|
|
* vnodes descends to one, we are on last close.
|
|
|
|
*/
|
2010-08-06 09:42:15 +00:00
|
|
|
dsw = dev_refthread(dev, &ref);
|
2004-10-22 09:59:37 +00:00
|
|
|
if (dsw == NULL)
|
|
|
|
return (ENXIO);
|
|
|
|
VI_LOCK(vp);
|
2005-03-13 12:14:56 +00:00
|
|
|
if (vp->v_iflag & VI_DOOMED) {
|
2004-10-22 09:59:37 +00:00
|
|
|
/* Forced close. */
|
|
|
|
} else if (dsw->d_flags & D_TRACKCLOSE) {
|
|
|
|
/* Keep device updated on status. */
|
|
|
|
} else if (count_dev(dev) > 1) {
|
|
|
|
VI_UNLOCK(vp);
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-10-22 09:59:37 +00:00
|
|
|
return (0);
|
|
|
|
}
|
2006-10-18 11:17:14 +00:00
|
|
|
vholdl(vp);
|
2004-10-22 09:59:37 +00:00
|
|
|
VI_UNLOCK(vp);
|
2008-02-25 18:45:57 +00:00
|
|
|
vp_locked = VOP_ISLOCKED(vp);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2004-10-22 09:59:37 +00:00
|
|
|
KASSERT(dev->si_refcount > 0,
|
|
|
|
("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
|
2008-05-07 19:03:57 +00:00
|
|
|
error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, vp_locked | LK_RETRY);
|
2006-10-18 11:17:14 +00:00
|
|
|
vdrop(vp);
|
2004-10-22 09:59:37 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
|
|
|
devfs_close_f(struct file *fp, struct thread *td)
|
|
|
|
{
|
2008-05-21 09:31:44 +00:00
|
|
|
int error;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2011-12-09 17:49:34 +00:00
|
|
|
/*
|
|
|
|
* NB: td may be NULL if this descriptor is closed due to
|
|
|
|
* garbage collection from a closed UNIX domain socket.
|
|
|
|
*/
|
|
|
|
fpop = curthread->td_fpop;
|
|
|
|
curthread->td_fpop = fp;
|
2008-05-21 09:31:44 +00:00
|
|
|
error = vnops.fo_close(fp, td);
|
2011-12-09 17:49:34 +00:00
|
|
|
curthread->td_fpop = fpop;
|
2011-11-04 03:39:31 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The f_cdevpriv cannot be assigned non-NULL value while we
|
|
|
|
* are destroying the file.
|
|
|
|
*/
|
|
|
|
if (fp->f_cdevpriv != NULL)
|
|
|
|
devfs_fpdrop(fp);
|
2008-05-21 09:31:44 +00:00
|
|
|
return (error);
|
2004-11-08 10:46:47 +00:00
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_fsync(struct vop_fsync_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
2009-01-08 19:13:34 +00:00
|
|
|
int error;
|
|
|
|
struct bufobj *bo;
|
|
|
|
struct devfs_dirent *de;
|
|
|
|
|
|
|
|
if (!vn_isdisk(ap->a_vp, &error)) {
|
|
|
|
bo = &ap->a_vp->v_bufobj;
|
|
|
|
de = ap->a_vp->v_data;
|
|
|
|
if (error == ENXIO && bo->bo_dirty.bv_cnt > 0) {
|
|
|
|
printf("Device %s went missing before all of the data "
|
|
|
|
"could be written to it; expect data loss.\n",
|
|
|
|
de->de_dirent->d_name);
|
|
|
|
|
|
|
|
error = vop_stdfsync(ap);
|
|
|
|
if (bo->bo_dirty.bv_cnt != 0 || error != 0)
|
|
|
|
panic("devfs_fsync: vop_stdfsync failed.");
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
return (0);
|
2009-01-08 19:13:34 +00:00
|
|
|
}
|
2004-10-22 09:59:37 +00:00
|
|
|
|
|
|
|
return (vop_stdfsync(ap));
|
|
|
|
}
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_getattr(struct vop_getattr_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp = ap->a_vp;
|
|
|
|
struct vattr *vap = ap->a_vap;
|
2010-08-25 15:29:12 +00:00
|
|
|
int error;
|
2000-08-20 21:34:39 +00:00
|
|
|
struct devfs_dirent *de;
|
2010-08-25 15:29:12 +00:00
|
|
|
struct devfs_mount *dmp;
|
2004-06-16 09:47:26 +00:00
|
|
|
struct cdev *dev;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2010-08-25 15:29:12 +00:00
|
|
|
error = devfs_populate_vp(vp);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
dmp = VFSTODEVFS(vp->v_mount);
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
|
2000-08-24 15:36:55 +00:00
|
|
|
de = vp->v_data;
|
2004-12-20 21:12:11 +00:00
|
|
|
KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
|
|
|
|
if (vp->v_type == VDIR) {
|
2000-08-24 15:36:55 +00:00
|
|
|
de = de->de_dir;
|
2004-12-20 21:12:11 +00:00
|
|
|
KASSERT(de != NULL,
|
|
|
|
("Null dir dirent in devfs_getattr vp=%p", vp));
|
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
vap->va_uid = de->de_uid;
|
|
|
|
vap->va_gid = de->de_gid;
|
|
|
|
vap->va_mode = de->de_mode;
|
2003-03-02 15:56:49 +00:00
|
|
|
if (vp->v_type == VLNK)
|
2004-02-19 19:09:52 +00:00
|
|
|
vap->va_size = strlen(de->de_symlink);
|
2001-11-25 21:00:38 +00:00
|
|
|
else if (vp->v_type == VDIR)
|
|
|
|
vap->va_size = vap->va_bytes = DEV_BSIZE;
|
2001-01-30 08:39:52 +00:00
|
|
|
else
|
|
|
|
vap->va_size = 0;
|
2001-11-25 21:00:38 +00:00
|
|
|
if (vp->v_type != VDIR)
|
|
|
|
vap->va_bytes = 0;
|
2000-08-20 21:34:39 +00:00
|
|
|
vap->va_blocksize = DEV_BSIZE;
|
2000-09-06 11:26:43 +00:00
|
|
|
vap->va_type = vp->v_type;
|
2001-02-02 22:54:41 +00:00
|
|
|
|
|
|
|
#define fix(aa) \
|
|
|
|
do { \
|
2007-04-20 01:47:05 +00:00
|
|
|
if ((aa).tv_sec <= 3600) { \
|
2001-02-02 22:54:41 +00:00
|
|
|
(aa).tv_sec = boottime.tv_sec; \
|
|
|
|
(aa).tv_nsec = boottime.tv_usec * 1000; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2000-08-24 15:36:55 +00:00
|
|
|
if (vp->v_type != VCHR) {
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(de->de_atime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_atime = de->de_atime;
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(de->de_mtime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_mtime = de->de_mtime;
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(de->de_ctime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_ctime = de->de_ctime;
|
|
|
|
} else {
|
|
|
|
dev = vp->v_rdev;
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(dev->si_atime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_atime = dev->si_atime;
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(dev->si_mtime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_mtime = dev->si_mtime;
|
2001-02-02 22:54:41 +00:00
|
|
|
fix(dev->si_ctime);
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_ctime = dev->si_ctime;
|
2005-03-10 18:21:34 +00:00
|
|
|
|
2008-06-16 17:34:59 +00:00
|
|
|
vap->va_rdev = cdev2priv(dev)->cdp_inode;
|
2000-08-24 15:36:55 +00:00
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
vap->va_gen = 0;
|
|
|
|
vap->va_flags = 0;
|
2008-09-20 19:50:52 +00:00
|
|
|
vap->va_filerev = 0;
|
2000-08-24 15:36:55 +00:00
|
|
|
vap->va_nlink = de->de_links;
|
2000-08-20 21:34:39 +00:00
|
|
|
vap->va_fileid = de->de_inode;
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/* ARGSUSED */
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
|
|
|
devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
|
|
|
|
{
|
|
|
|
struct cdev *dev;
|
|
|
|
struct cdevsw *dsw;
|
2004-11-13 23:21:54 +00:00
|
|
|
struct vnode *vp;
|
2004-11-08 10:46:47 +00:00
|
|
|
struct vnode *vpold;
|
2010-08-06 09:42:15 +00:00
|
|
|
int error, i, ref;
|
2005-03-10 18:49:17 +00:00
|
|
|
const char *p;
|
|
|
|
struct fiodgname_arg *fgn;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2008-09-26 14:50:49 +00:00
|
|
|
fpop = td->td_fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
2014-10-15 13:16:51 +00:00
|
|
|
if (error != 0) {
|
|
|
|
error = vnops.fo_ioctl(fp, com, data, cred, td);
|
2004-11-13 23:21:54 +00:00
|
|
|
return (error);
|
2014-10-15 13:16:51 +00:00
|
|
|
}
|
2004-11-08 10:46:47 +00:00
|
|
|
|
|
|
|
if (com == FIODTYPE) {
|
|
|
|
*(int *)data = dsw->d_flags & D_TYPEMASK;
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
return (0);
|
2005-03-10 18:49:17 +00:00
|
|
|
} else if (com == FIODGNAME) {
|
|
|
|
fgn = data;
|
|
|
|
p = devtoname(dev);
|
|
|
|
i = strlen(p) + 1;
|
|
|
|
if (i > fgn->len)
|
2005-08-29 11:14:18 +00:00
|
|
|
error = EINVAL;
|
|
|
|
else
|
|
|
|
error = copyout(p, fgn->buf, i);
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2005-08-29 11:14:18 +00:00
|
|
|
return (error);
|
2004-11-08 10:46:47 +00:00
|
|
|
}
|
|
|
|
error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
|
2008-05-21 09:31:44 +00:00
|
|
|
td->td_fpop = NULL;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
if (error == ENOIOCTL)
|
|
|
|
error = ENOTTY;
|
|
|
|
if (error == 0 && com == TIOCSCTTY) {
|
2004-11-13 23:21:54 +00:00
|
|
|
vp = fp->f_vnode;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
|
|
|
/* Do nothing if reassigning same control tty */
|
|
|
|
sx_slock(&proctree_lock);
|
|
|
|
if (td->td_proc->p_session->s_ttyvp == vp) {
|
|
|
|
sx_sunlock(&proctree_lock);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
vpold = td->td_proc->p_session->s_ttyvp;
|
|
|
|
VREF(vp);
|
|
|
|
SESS_LOCK(td->td_proc->p_session);
|
|
|
|
td->td_proc->p_session->s_ttyvp = vp;
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
td->td_proc->p_session->s_ttydp = cdev2priv(dev);
|
2004-11-08 10:46:47 +00:00
|
|
|
SESS_UNLOCK(td->td_proc->p_session);
|
|
|
|
|
|
|
|
sx_sunlock(&proctree_lock);
|
|
|
|
|
|
|
|
/* Get rid of reference to old control tty */
|
|
|
|
if (vpold)
|
|
|
|
vrele(vpold);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/* ARGSUSED */
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
|
|
|
devfs_kqfilter_f(struct file *fp, struct knote *kn)
|
|
|
|
{
|
|
|
|
struct cdev *dev;
|
|
|
|
struct cdevsw *dsw;
|
2010-08-06 09:42:15 +00:00
|
|
|
int error, ref;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
|
|
|
struct thread *td;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2008-09-26 14:50:49 +00:00
|
|
|
td = curthread;
|
|
|
|
fpop = td->td_fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
2004-11-13 23:21:54 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2004-11-08 10:46:47 +00:00
|
|
|
error = dsw->d_kqfilter(dev, kn);
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2009-06-20 14:50:32 +00:00
|
|
|
static inline int
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
devfs_prison_check(struct devfs_dirent *de, struct thread *td)
|
2009-06-20 14:50:32 +00:00
|
|
|
{
|
|
|
|
struct cdev_priv *cdp;
|
|
|
|
struct ucred *dcr;
|
2014-11-03 03:12:15 +00:00
|
|
|
struct proc *p;
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
int error;
|
2009-06-20 14:50:32 +00:00
|
|
|
|
|
|
|
cdp = de->de_cdp;
|
|
|
|
if (cdp == NULL)
|
|
|
|
return (0);
|
|
|
|
dcr = cdp->cdp_c.si_cred;
|
|
|
|
if (dcr == NULL)
|
|
|
|
return (0);
|
|
|
|
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
error = prison_check(td->td_ucred, dcr);
|
|
|
|
if (error == 0)
|
|
|
|
return (0);
|
|
|
|
/* We do, however, allow access to the controlling terminal */
|
2014-11-03 03:12:15 +00:00
|
|
|
p = td->td_proc;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
if (!(p->p_flag & P_CONTROLT)) {
|
|
|
|
PROC_UNLOCK(p);
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
return (error);
|
2014-11-03 03:12:15 +00:00
|
|
|
}
|
|
|
|
if (p->p_session->s_ttydp == cdp)
|
|
|
|
error = 0;
|
|
|
|
PROC_UNLOCK(p);
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
return (error);
|
2009-06-20 14:50:32 +00:00
|
|
|
}
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
static int
|
2006-09-18 13:23:08 +00:00
|
|
|
devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
2000-08-27 14:46:36 +00:00
|
|
|
struct componentname *cnp;
|
|
|
|
struct vnode *dvp, **vpp;
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td;
|
2000-08-27 14:46:36 +00:00
|
|
|
struct devfs_dirent *de, *dd;
|
2003-10-20 07:04:09 +00:00
|
|
|
struct devfs_dirent **dde;
|
2000-08-27 14:46:36 +00:00
|
|
|
struct devfs_mount *dmp;
|
2004-06-16 09:47:26 +00:00
|
|
|
struct cdev *cdev;
|
2010-08-06 09:23:47 +00:00
|
|
|
int error, flags, nameiop, dvplocked;
|
2000-08-27 14:46:36 +00:00
|
|
|
char specname[SPECNAMELEN + 1], *pname;
|
|
|
|
|
|
|
|
cnp = ap->a_cnp;
|
|
|
|
vpp = ap->a_vpp;
|
|
|
|
dvp = ap->a_dvp;
|
|
|
|
pname = cnp->cn_nameptr;
|
2001-09-12 08:38:13 +00:00
|
|
|
td = cnp->cn_thread;
|
2000-08-27 14:46:36 +00:00
|
|
|
flags = cnp->cn_flags;
|
|
|
|
nameiop = cnp->cn_nameiop;
|
|
|
|
dmp = VFSTODEVFS(dvp->v_mount);
|
|
|
|
dd = dvp->v_data;
|
|
|
|
*vpp = NULLVP;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2002-05-10 15:41:14 +00:00
|
|
|
if ((flags & ISLASTCN) && nameiop == RENAME)
|
2000-08-27 14:46:36 +00:00
|
|
|
return (EOPNOTSUPP);
|
|
|
|
|
|
|
|
if (dvp->v_type != VDIR)
|
|
|
|
return (ENOTDIR);
|
|
|
|
|
2002-08-04 10:29:36 +00:00
|
|
|
if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
|
2000-08-27 14:46:36 +00:00
|
|
|
return (EIO);
|
|
|
|
|
2001-09-12 08:38:13 +00:00
|
|
|
error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
|
2000-08-27 14:46:36 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (cnp->cn_namelen == 1 && *pname == '.') {
|
2002-05-10 15:41:14 +00:00
|
|
|
if ((flags & ISLASTCN) && nameiop != LOOKUP)
|
2000-08-27 14:46:36 +00:00
|
|
|
return (EINVAL);
|
|
|
|
*vpp = dvp;
|
|
|
|
VREF(dvp);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & ISDOTDOT) {
|
2002-05-10 15:41:14 +00:00
|
|
|
if ((flags & ISLASTCN) && nameiop != LOOKUP)
|
2000-08-27 14:46:36 +00:00
|
|
|
return (EINVAL);
|
2010-06-09 15:29:12 +00:00
|
|
|
de = devfs_parent_dirent(dd);
|
|
|
|
if (de == NULL)
|
|
|
|
return (ENOENT);
|
2010-08-06 09:23:47 +00:00
|
|
|
dvplocked = VOP_ISLOCKED(dvp);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(dvp, 0);
|
2010-08-06 09:23:47 +00:00
|
|
|
error = devfs_allocv(de, dvp->v_mount,
|
|
|
|
cnp->cn_lkflags & LK_TYPE_MASK, vpp);
|
2006-09-18 13:23:08 +00:00
|
|
|
*dm_unlock = 0;
|
2010-08-06 09:23:47 +00:00
|
|
|
vn_lock(dvp, dvplocked | LK_RETRY);
|
2000-08-27 14:46:36 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2000-08-27 14:46:36 +00:00
|
|
|
dd = dvp->v_data;
|
2010-08-12 15:29:07 +00:00
|
|
|
de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
while (de == NULL) { /* While(...) so we can use break */
|
2000-08-27 14:46:36 +00:00
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
if (nameiop == DELETE)
|
|
|
|
return (ENOENT);
|
2003-10-20 15:08:10 +00:00
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
/*
|
|
|
|
* OK, we didn't have an entry for the name we were asked for
|
|
|
|
* so we try to see if anybody can create it on demand.
|
|
|
|
*/
|
2010-09-21 16:49:02 +00:00
|
|
|
pname = devfs_fqpn(specname, dmp, dd, cnp);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
if (pname == NULL)
|
|
|
|
break;
|
2003-10-20 15:08:10 +00:00
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
cdev = NULL;
|
2007-07-03 17:42:37 +00:00
|
|
|
DEVFS_DMP_HOLD(dmp);
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
sx_slock(&clone_drain_lock);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
EVENTHANDLER_INVOKE(dev_clone,
|
|
|
|
td->td_ucred, pname, strlen(pname), &cdev);
|
2007-07-03 17:42:37 +00:00
|
|
|
sx_sunlock(&clone_drain_lock);
|
2010-08-22 16:08:12 +00:00
|
|
|
|
|
|
|
if (cdev == NULL)
|
|
|
|
sx_xlock(&dmp->dm_lock);
|
|
|
|
else if (devfs_populate_vp(dvp) != 0) {
|
|
|
|
*dm_unlock = 0;
|
|
|
|
sx_xlock(&dmp->dm_lock);
|
|
|
|
if (DEVFS_DMP_DROP(dmp)) {
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
devfs_unmount_final(dmp);
|
|
|
|
} else
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
dev_rel(cdev);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2007-07-03 17:42:37 +00:00
|
|
|
if (DEVFS_DMP_DROP(dmp)) {
|
|
|
|
*dm_unlock = 0;
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
devfs_unmount_final(dmp);
|
2010-08-19 16:39:00 +00:00
|
|
|
if (cdev != NULL)
|
|
|
|
dev_rel(cdev);
|
2007-07-03 17:42:37 +00:00
|
|
|
return (ENOENT);
|
|
|
|
}
|
2010-08-22 16:08:12 +00:00
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
if (cdev == NULL)
|
|
|
|
break;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
dev_lock();
|
2008-06-16 17:34:59 +00:00
|
|
|
dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
if (dde != NULL && *dde != NULL)
|
|
|
|
de = *dde;
|
|
|
|
dev_unlock();
|
|
|
|
dev_rel(cdev);
|
|
|
|
break;
|
2000-08-27 14:46:36 +00:00
|
|
|
}
|
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
if (de == NULL || de->de_flags & DE_WHITEOUT) {
|
|
|
|
if ((nameiop == CREATE || nameiop == RENAME) &&
|
|
|
|
(flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
|
|
|
|
cnp->cn_flags |= SAVENAME;
|
|
|
|
return (EJUSTRETURN);
|
|
|
|
}
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2000-08-27 14:46:36 +00:00
|
|
|
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
if (devfs_prison_check(de, td))
|
2009-06-20 14:50:32 +00:00
|
|
|
return (ENOENT);
|
|
|
|
|
2000-08-27 14:46:36 +00:00
|
|
|
if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
|
2001-09-12 08:38:13 +00:00
|
|
|
error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
|
2000-08-27 14:46:36 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
if (*vpp == dvp) {
|
|
|
|
VREF(dvp);
|
|
|
|
*vpp = dvp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
2010-08-06 09:23:47 +00:00
|
|
|
error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK,
|
|
|
|
vpp);
|
2006-09-18 13:23:08 +00:00
|
|
|
*dm_unlock = 0;
|
2005-03-28 09:34:36 +00:00
|
|
|
return (error);
|
2000-08-27 14:46:36 +00:00
|
|
|
}
|
|
|
|
|
2000-09-06 11:26:43 +00:00
|
|
|
static int
|
|
|
|
devfs_lookup(struct vop_lookup_args *ap)
|
|
|
|
{
|
|
|
|
int j;
|
|
|
|
struct devfs_mount *dmp;
|
2006-09-18 13:23:08 +00:00
|
|
|
int dm_unlock;
|
2000-09-06 11:26:43 +00:00
|
|
|
|
2010-08-22 16:08:12 +00:00
|
|
|
if (devfs_populate_vp(ap->a_dvp) != 0)
|
|
|
|
return (ENOTDIR);
|
|
|
|
|
2000-09-06 11:26:43 +00:00
|
|
|
dmp = VFSTODEVFS(ap->a_dvp->v_mount);
|
2006-09-18 13:23:08 +00:00
|
|
|
dm_unlock = 1;
|
|
|
|
j = devfs_lookupx(ap, &dm_unlock);
|
|
|
|
if (dm_unlock == 1)
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2000-09-06 11:26:43 +00:00
|
|
|
return (j);
|
|
|
|
}
|
|
|
|
|
2001-05-23 17:48:20 +00:00
|
|
|
static int
|
|
|
|
devfs_mknod(struct vop_mknod_args *ap)
|
|
|
|
{
|
|
|
|
struct componentname *cnp;
|
|
|
|
struct vnode *dvp, **vpp;
|
|
|
|
struct devfs_dirent *dd, *de;
|
|
|
|
struct devfs_mount *dmp;
|
|
|
|
int error;
|
|
|
|
|
2005-07-20 13:34:16 +00:00
|
|
|
/*
|
|
|
|
* The only type of node we should be creating here is a
|
|
|
|
* character device, for anything else return EOPNOTSUPP.
|
|
|
|
*/
|
|
|
|
if (ap->a_vap->va_type != VCHR)
|
|
|
|
return (EOPNOTSUPP);
|
2001-05-23 17:48:20 +00:00
|
|
|
dvp = ap->a_dvp;
|
|
|
|
dmp = VFSTODEVFS(dvp->v_mount);
|
|
|
|
|
|
|
|
cnp = ap->a_cnp;
|
|
|
|
vpp = ap->a_vpp;
|
|
|
|
dd = dvp->v_data;
|
2003-03-02 15:56:49 +00:00
|
|
|
|
2001-05-23 17:48:20 +00:00
|
|
|
error = ENOENT;
|
2006-09-18 13:23:08 +00:00
|
|
|
sx_xlock(&dmp->dm_lock);
|
2001-05-23 17:48:20 +00:00
|
|
|
TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
|
|
|
|
if (cnp->cn_namelen != de->de_dirent->d_namlen)
|
|
|
|
continue;
|
2015-01-19 17:24:52 +00:00
|
|
|
if (de->de_dirent->d_type == DT_CHR &&
|
|
|
|
(de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
|
|
|
|
continue;
|
2001-05-23 17:48:20 +00:00
|
|
|
if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
|
|
|
|
de->de_dirent->d_namlen) != 0)
|
|
|
|
continue;
|
|
|
|
if (de->de_flags & DE_WHITEOUT)
|
|
|
|
break;
|
|
|
|
goto notfound;
|
|
|
|
}
|
|
|
|
if (de == NULL)
|
|
|
|
goto notfound;
|
|
|
|
de->de_flags &= ~DE_WHITEOUT;
|
2010-08-06 09:23:47 +00:00
|
|
|
error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
|
2006-09-18 13:23:08 +00:00
|
|
|
return (error);
|
2001-05-23 17:48:20 +00:00
|
|
|
notfound:
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2001-05-23 17:48:20 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/* ARGSUSED */
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_open(struct vop_open_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
|
|
|
struct thread *td = ap->a_td;
|
|
|
|
struct vnode *vp = ap->a_vp;
|
|
|
|
struct cdev *dev = vp->v_rdev;
|
2007-05-31 11:51:53 +00:00
|
|
|
struct file *fp = ap->a_fp;
|
2010-08-06 09:42:15 +00:00
|
|
|
int error, ref, vlocked;
|
2004-10-22 09:59:37 +00:00
|
|
|
struct cdevsw *dsw;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2012-12-23 22:43:27 +00:00
|
|
|
struct mtx *mtxp;
|
2004-10-22 09:59:37 +00:00
|
|
|
|
|
|
|
if (vp->v_type == VBLK)
|
|
|
|
return (ENXIO);
|
|
|
|
|
|
|
|
if (dev == NULL)
|
|
|
|
return (ENXIO);
|
|
|
|
|
|
|
|
/* Make this field valid before any I/O in d_open. */
|
|
|
|
if (dev->si_iosize_max == 0)
|
|
|
|
dev->si_iosize_max = DFLTPHYS;
|
|
|
|
|
2010-08-06 09:42:15 +00:00
|
|
|
dsw = dev_refthread(dev, &ref);
|
2004-10-22 09:59:37 +00:00
|
|
|
if (dsw == NULL)
|
|
|
|
return (ENXIO);
|
2011-11-03 18:55:18 +00:00
|
|
|
if (fp == NULL && dsw->d_fdopen != NULL) {
|
|
|
|
dev_relthread(dev, ref);
|
|
|
|
return (ENXIO);
|
|
|
|
}
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2010-08-06 09:23:47 +00:00
|
|
|
vlocked = VOP_ISLOCKED(vp);
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2008-09-26 14:50:49 +00:00
|
|
|
fpop = td->td_fpop;
|
|
|
|
td->td_fpop = fp;
|
2009-05-15 19:23:05 +00:00
|
|
|
if (fp != NULL) {
|
2008-05-21 09:31:44 +00:00
|
|
|
fp->f_data = dev;
|
2009-05-15 19:23:05 +00:00
|
|
|
fp->f_vnode = vp;
|
|
|
|
}
|
2008-05-07 19:03:57 +00:00
|
|
|
if (dsw->d_fdopen != NULL)
|
|
|
|
error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
|
|
|
|
else
|
|
|
|
error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
|
2012-08-15 16:19:39 +00:00
|
|
|
/* cleanup any cdevpriv upon error */
|
|
|
|
if (error != 0)
|
|
|
|
devfs_clear_cdevpriv();
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2010-08-06 09:23:47 +00:00
|
|
|
vn_lock(vp, vlocked | LK_RETRY);
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2013-02-07 14:53:33 +00:00
|
|
|
if (error != 0) {
|
|
|
|
if (error == ERESTART)
|
|
|
|
error = EINTR;
|
2004-10-22 09:59:37 +00:00
|
|
|
return (error);
|
2013-02-07 14:53:33 +00:00
|
|
|
}
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2004-12-14 09:32:18 +00:00
|
|
|
#if 0 /* /dev/console */
|
2010-08-06 09:23:47 +00:00
|
|
|
KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
|
2004-12-14 09:32:18 +00:00
|
|
|
#else
|
2010-08-06 09:23:47 +00:00
|
|
|
if (fp == NULL)
|
2004-12-14 09:32:18 +00:00
|
|
|
return (error);
|
2004-11-08 10:46:47 +00:00
|
|
|
#endif
|
Merge back devfs changes from the mpsafetty branch.
In the mpsafetty branch, PTY's are allocated through the posix_openpt()
system call. The controller side of a PTY now uses its own file
descriptor type (just like sockets, vnodes, pipes, etc).
To remain compatible with existing FreeBSD and Linux C libraries, we can
still create PTY's by opening /dev/ptmx or /dev/ptyXX. These nodes
implement d_fdopen(). Devfs has been slightly changed here, to allow
finit() to be called from d_fdopen().
The routine grantpt() has also been moved into the kernel. This routine
is a little odd, because it needs to bypass standard UNIX permissions.
It needs to change the owner/group/mode of the slave device node, which
may often not be possible. The old implementation solved this by
spawning a setuid utility.
When VOP_SETATTR() is called with NOCRED, devfs_setattr() dereferences
ap->a_cred, causing a kernel panic. Change the de_{uid,gid,mode} code to
allow changes when a->a_cred is set to NOCRED.
Approved by: philip (mentor)
2008-05-31 14:06:37 +00:00
|
|
|
if (fp->f_ops == &badfileops)
|
|
|
|
finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
|
2012-12-23 22:43:27 +00:00
|
|
|
mtxp = mtx_pool_find(mtxpool_sleep, fp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hint to the dofilewrite() to not force the buffer draining
|
|
|
|
* on the writer to the file. Most likely, the write would
|
|
|
|
* not need normal buffers.
|
|
|
|
*/
|
|
|
|
mtx_lock(mtxp);
|
|
|
|
fp->f_vnread_flags |= FDEVFS_VNODE;
|
|
|
|
mtx_unlock(mtxp);
|
2004-10-22 09:59:37 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2001-11-25 21:00:38 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_pathconf(struct vop_pathconf_args *ap)
|
2001-11-25 21:00:38 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
switch (ap->a_name) {
|
2002-10-20 22:50:43 +00:00
|
|
|
case _PC_MAC_PRESENT:
|
2002-08-02 03:12:40 +00:00
|
|
|
#ifdef MAC
|
|
|
|
/*
|
|
|
|
* If MAC is enabled, devfs automatically supports
|
|
|
|
* trivial non-persistant label storage.
|
|
|
|
*/
|
|
|
|
*ap->a_retval = 1;
|
|
|
|
#else
|
|
|
|
*ap->a_retval = 0;
|
2002-10-20 22:50:43 +00:00
|
|
|
#endif
|
2002-08-02 03:12:40 +00:00
|
|
|
return (0);
|
2001-11-25 21:00:38 +00:00
|
|
|
default:
|
|
|
|
return (vop_stdpathconf(ap));
|
|
|
|
}
|
|
|
|
/* NOTREACHED */
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/* ARGSUSED */
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
|
|
|
devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
|
|
|
|
{
|
|
|
|
struct cdev *dev;
|
|
|
|
struct cdevsw *dsw;
|
2010-08-06 09:42:15 +00:00
|
|
|
int error, ref;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2008-09-26 14:50:49 +00:00
|
|
|
fpop = td->td_fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
2014-10-15 13:16:51 +00:00
|
|
|
if (error != 0) {
|
|
|
|
error = vnops.fo_poll(fp, events, cred, td);
|
|
|
|
return (error);
|
|
|
|
}
|
2004-11-08 10:46:47 +00:00
|
|
|
error = dsw->d_poll(dev, events, td);
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
/*
|
|
|
|
* Print out the contents of a special device vnode.
|
|
|
|
*/
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_print(struct vop_print_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
2013-10-15 06:28:11 +00:00
|
|
|
devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
|
|
|
|
int flags, struct thread *td)
|
2004-11-08 10:46:47 +00:00
|
|
|
{
|
|
|
|
struct cdev *dev;
|
2012-02-21 01:05:12 +00:00
|
|
|
int ioflag, error, ref;
|
|
|
|
ssize_t resid;
|
2004-11-08 10:46:47 +00:00
|
|
|
struct cdevsw *dsw;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2013-10-15 06:33:10 +00:00
|
|
|
if (uio->uio_resid > DEVFS_IOSIZE_MAX)
|
|
|
|
return (EINVAL);
|
2008-09-26 14:50:49 +00:00
|
|
|
fpop = td->td_fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
2014-10-15 13:16:51 +00:00
|
|
|
if (error != 0) {
|
|
|
|
error = vnops.fo_read(fp, uio, cred, flags, td);
|
2004-11-13 23:21:54 +00:00
|
|
|
return (error);
|
2014-10-15 13:16:51 +00:00
|
|
|
}
|
2004-11-08 10:46:47 +00:00
|
|
|
resid = uio->uio_resid;
|
2004-12-22 17:05:44 +00:00
|
|
|
ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
|
|
|
|
if (ioflag & O_DIRECT)
|
2004-11-08 10:46:47 +00:00
|
|
|
ioflag |= IO_DIRECT;
|
|
|
|
|
2012-07-02 21:01:03 +00:00
|
|
|
foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
|
2004-11-08 10:46:47 +00:00
|
|
|
error = dsw->d_read(dev, uio, ioflag);
|
2015-04-01 08:25:40 +00:00
|
|
|
if (uio->uio_resid != resid || (error == 0 && resid != 0))
|
|
|
|
devfs_timestamp(&dev->si_atime);
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2012-07-02 21:01:03 +00:00
|
|
|
foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
|
2004-11-08 10:46:47 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_readdir(struct vop_readdir_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
2000-08-26 16:20:57 +00:00
|
|
|
int error;
|
|
|
|
struct uio *uio;
|
2000-08-20 21:34:39 +00:00
|
|
|
struct dirent *dp;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_dirent *dd;
|
2000-08-20 21:34:39 +00:00
|
|
|
struct devfs_dirent *de;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_mount *dmp;
|
2009-01-31 17:36:22 +00:00
|
|
|
off_t off;
|
2005-11-09 22:03:50 +00:00
|
|
|
int *tmp_ncookies = NULL;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
|
|
|
if (ap->a_vp->v_type != VDIR)
|
|
|
|
return (ENOTDIR);
|
|
|
|
|
2000-08-26 16:20:57 +00:00
|
|
|
uio = ap->a_uio;
|
|
|
|
if (uio->uio_offset < 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2005-11-09 22:03:50 +00:00
|
|
|
/*
|
|
|
|
* XXX: This is a temporary hack to get around this filesystem not
|
|
|
|
* supporting cookies. We store the location of the ncookies pointer
|
|
|
|
* in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
|
|
|
|
* and set the number of cookies to 0. We then set the pointer to
|
|
|
|
* NULL so that vfs_read_dirent doesn't try to call realloc() on
|
|
|
|
* ap->a_cookies. Later in this function, we restore the ap->a_ncookies
|
|
|
|
* pointer to its original location before returning to the caller.
|
|
|
|
*/
|
|
|
|
if (ap->a_ncookies != NULL) {
|
|
|
|
tmp_ncookies = ap->a_ncookies;
|
|
|
|
*ap->a_ncookies = 0;
|
|
|
|
ap->a_ncookies = NULL;
|
|
|
|
}
|
|
|
|
|
2000-08-24 15:36:55 +00:00
|
|
|
dmp = VFSTODEVFS(ap->a_vp->v_mount);
|
2010-08-22 16:08:12 +00:00
|
|
|
if (devfs_populate_vp(ap->a_vp) != 0) {
|
2006-10-18 11:17:14 +00:00
|
|
|
if (tmp_ncookies != NULL)
|
|
|
|
ap->a_ncookies = tmp_ncookies;
|
|
|
|
return (EIO);
|
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
error = 0;
|
2000-08-24 15:36:55 +00:00
|
|
|
de = ap->a_vp->v_data;
|
2000-08-20 21:34:39 +00:00
|
|
|
off = 0;
|
2001-05-23 17:48:20 +00:00
|
|
|
TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
|
2010-08-12 15:29:07 +00:00
|
|
|
if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
|
2001-05-23 17:48:20 +00:00
|
|
|
continue;
|
Let access overriding to TTYs depend on the cdev_priv, not the vnode.
Basically this commit changes two things, which improves access to TTYs
in exceptional conditions. Basically the problem was that when you ran
jexec(8) to attach to a jail, you couldn't use /dev/tty (well, also the
node of the actual TTY, e.g. /dev/pts/X). This is very inconvenient if
you want to attach to screens quickly, use ssh(1), etc.
The fixes:
- Cache the cdev_priv of the controlling TTY in struct session. Change
devfs_access() to compare against the cdev_priv instead of the vnode.
This allows you to bypass UNIX permissions, even across different
mounts of devfs.
- Extend devfs_prison_check() to unconditionally expose the device node
of the controlling TTY, even if normal prison nesting rules normally
don't allow this. This actually allows you to interact with this
device node.
To be honest, I'm not really happy with this solution. We now have to
store three pointers to a controlling TTY (s_ttyp, s_ttyvp, s_ttydp).
In an ideal world, we should just get rid of the latter two and only use
s_ttyp, but this makes certian pieces of code very impractical (e.g.
devfs, kern_exit.c).
Reported by: Many people
2009-12-19 18:42:12 +00:00
|
|
|
if (devfs_prison_check(dd, uio->uio_td))
|
2009-06-20 14:50:32 +00:00
|
|
|
continue;
|
2000-10-09 14:18:07 +00:00
|
|
|
if (dd->de_dirent->d_type == DT_DIR)
|
2000-08-24 15:36:55 +00:00
|
|
|
de = dd->de_dir;
|
|
|
|
else
|
|
|
|
de = dd;
|
|
|
|
dp = dd->de_dirent;
|
2000-08-26 16:20:57 +00:00
|
|
|
if (dp->d_reclen > uio->uio_resid)
|
|
|
|
break;
|
2000-08-20 21:34:39 +00:00
|
|
|
dp->d_fileno = de->de_inode;
|
2000-08-26 16:20:57 +00:00
|
|
|
if (off >= uio->uio_offset) {
|
2005-09-12 08:03:15 +00:00
|
|
|
error = vfs_read_dirent(ap, dp, off);
|
2000-08-26 16:20:57 +00:00
|
|
|
if (error)
|
2000-08-20 21:34:39 +00:00
|
|
|
break;
|
2000-08-26 16:20:57 +00:00
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
off += dp->d_reclen;
|
|
|
|
}
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2000-08-20 21:34:39 +00:00
|
|
|
uio->uio_offset = off;
|
2005-11-09 22:03:50 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Restore ap->a_ncookies if it wasn't originally NULL in the first
|
|
|
|
* place.
|
|
|
|
*/
|
|
|
|
if (tmp_ncookies != NULL)
|
|
|
|
ap->a_ncookies = tmp_ncookies;
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_readlink(struct vop_readlink_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
|
|
|
struct devfs_dirent *de;
|
|
|
|
|
|
|
|
de = ap->a_vp->v_data;
|
2005-09-15 10:28:19 +00:00
|
|
|
return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
|
2000-08-20 21:34:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_reclaim(struct vop_reclaim_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp = ap->a_vp;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_dirent *de;
|
2005-02-22 15:51:07 +00:00
|
|
|
struct cdev *dev;
|
2006-10-18 11:17:14 +00:00
|
|
|
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_lock(&devfs_de_interlock);
|
2000-08-24 15:36:55 +00:00
|
|
|
de = vp->v_data;
|
2006-07-12 20:25:35 +00:00
|
|
|
if (de != NULL) {
|
2000-10-09 14:18:07 +00:00
|
|
|
de->de_vnode = NULL;
|
2006-07-12 20:25:35 +00:00
|
|
|
vp->v_data = NULL;
|
|
|
|
}
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
|
|
|
|
2005-01-28 14:42:17 +00:00
|
|
|
vnode_destroy_vobject(vp);
|
2005-02-22 15:51:07 +00:00
|
|
|
|
2008-01-08 04:45:24 +00:00
|
|
|
VI_LOCK(vp);
|
2006-10-20 07:59:50 +00:00
|
|
|
dev_lock();
|
2005-02-22 15:51:07 +00:00
|
|
|
dev = vp->v_rdev;
|
|
|
|
vp->v_rdev = NULL;
|
|
|
|
|
2006-10-20 07:59:50 +00:00
|
|
|
if (dev == NULL) {
|
|
|
|
dev_unlock();
|
2008-01-08 04:45:24 +00:00
|
|
|
VI_UNLOCK(vp);
|
2005-02-22 18:17:31 +00:00
|
|
|
return (0);
|
2006-10-20 07:59:50 +00:00
|
|
|
}
|
2005-02-22 18:17:31 +00:00
|
|
|
|
2005-02-22 15:51:07 +00:00
|
|
|
dev->si_usecount -= vp->v_usecount;
|
|
|
|
dev_unlock();
|
2008-01-08 04:45:24 +00:00
|
|
|
VI_UNLOCK(vp);
|
2005-02-22 15:51:07 +00:00
|
|
|
dev_rel(dev);
|
2000-08-20 21:34:39 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_remove(struct vop_remove_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
2010-09-15 14:23:55 +00:00
|
|
|
struct vnode *dvp = ap->a_dvp;
|
2000-08-20 21:34:39 +00:00
|
|
|
struct vnode *vp = ap->a_vp;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_dirent *dd;
|
2010-08-12 15:29:07 +00:00
|
|
|
struct devfs_dirent *de, *de_covered;
|
2000-09-06 11:26:43 +00:00
|
|
|
struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2010-09-15 14:23:55 +00:00
|
|
|
ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
|
|
|
|
ASSERT_VOP_ELOCKED(vp, "devfs_remove");
|
|
|
|
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
sx_xlock(&dmp->dm_lock);
|
2000-08-20 21:34:39 +00:00
|
|
|
dd = ap->a_dvp->v_data;
|
|
|
|
de = vp->v_data;
|
2005-10-18 20:21:25 +00:00
|
|
|
if (de->de_cdp == NULL) {
|
2001-09-30 08:43:33 +00:00
|
|
|
TAILQ_REMOVE(&dd->de_dlist, de, de_list);
|
2010-08-12 15:29:07 +00:00
|
|
|
if (de->de_dirent->d_type == DT_LNK) {
|
|
|
|
de_covered = devfs_find(dd, de->de_dirent->d_name,
|
|
|
|
de->de_dirent->d_namlen, 0);
|
|
|
|
if (de_covered != NULL)
|
|
|
|
de_covered->de_flags &= ~DE_COVERED;
|
|
|
|
}
|
2010-09-15 14:23:55 +00:00
|
|
|
/* We need to unlock dvp because devfs_delete() may lock it. */
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
if (dvp != vp)
|
|
|
|
VOP_UNLOCK(dvp, 0);
|
|
|
|
devfs_delete(dmp, de, 0);
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
if (dvp != vp)
|
|
|
|
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
|
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2001-09-30 08:43:33 +00:00
|
|
|
} else {
|
|
|
|
de->de_flags |= DE_WHITEOUT;
|
2010-09-15 14:23:55 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2001-09-30 08:43:33 +00:00
|
|
|
}
|
2000-08-20 21:34:39 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Revoke is called on a tty when a terminal session ends. The vnode
|
|
|
|
* is orphaned by setting v_op to deadfs so we need to let go of it
|
|
|
|
* as well so that we create a new one next time around.
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
*
|
2000-08-20 21:34:39 +00:00
|
|
|
*/
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_revoke(struct vop_revoke_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
struct vnode *vp = ap->a_vp, *vp2;
|
2004-11-13 23:37:29 +00:00
|
|
|
struct cdev *dev;
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
struct cdev_priv *cdp;
|
2005-02-22 15:51:07 +00:00
|
|
|
struct devfs_dirent *de;
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
int i;
|
2000-08-20 21:34:39 +00:00
|
|
|
|
2004-11-13 23:37:29 +00:00
|
|
|
KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
|
|
|
|
|
|
|
|
dev = vp->v_rdev;
|
2008-06-16 17:34:59 +00:00
|
|
|
cdp = cdev2priv(dev);
|
2006-10-18 11:17:14 +00:00
|
|
|
|
|
|
|
dev_lock();
|
|
|
|
cdp->cdp_inuse++;
|
|
|
|
dev_unlock();
|
|
|
|
|
|
|
|
vhold(vp);
|
|
|
|
vgone(vp);
|
|
|
|
vdrop(vp);
|
|
|
|
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp,0);
|
2006-10-18 11:17:14 +00:00
|
|
|
loop:
|
2004-11-13 23:37:29 +00:00
|
|
|
for (;;) {
|
2006-07-12 20:25:35 +00:00
|
|
|
mtx_lock(&devfs_de_interlock);
|
2004-11-13 23:37:29 +00:00
|
|
|
dev_lock();
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
vp2 = NULL;
|
|
|
|
for (i = 0; i <= cdp->cdp_maxdirent; i++) {
|
|
|
|
de = cdp->cdp_dirents[i];
|
|
|
|
if (de == NULL)
|
|
|
|
continue;
|
2006-07-12 20:25:35 +00:00
|
|
|
|
2006-10-18 11:17:14 +00:00
|
|
|
vp2 = de->de_vnode;
|
2006-07-12 20:25:35 +00:00
|
|
|
if (vp2 != NULL) {
|
|
|
|
dev_unlock();
|
|
|
|
VI_LOCK(vp2);
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
2006-10-18 11:17:14 +00:00
|
|
|
if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK,
|
|
|
|
curthread))
|
|
|
|
goto loop;
|
|
|
|
vhold(vp2);
|
2006-07-12 20:25:35 +00:00
|
|
|
vgone(vp2);
|
|
|
|
vdrop(vp2);
|
2006-10-18 11:17:14 +00:00
|
|
|
vput(vp2);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
break;
|
2006-10-18 11:17:14 +00:00
|
|
|
}
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
}
|
|
|
|
if (vp2 != NULL) {
|
|
|
|
continue;
|
|
|
|
}
|
2006-07-12 20:25:35 +00:00
|
|
|
dev_unlock();
|
|
|
|
mtx_unlock(&devfs_de_interlock);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
break;
|
2004-11-13 23:37:29 +00:00
|
|
|
}
|
2006-10-18 11:17:14 +00:00
|
|
|
dev_lock();
|
|
|
|
cdp->cdp_inuse--;
|
|
|
|
if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
|
|
|
|
TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
|
|
|
|
dev_unlock();
|
|
|
|
dev_rel(&cdp->cdp_c);
|
|
|
|
} else
|
|
|
|
dev_unlock();
|
|
|
|
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2000-08-20 21:34:39 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2004-10-22 09:59:37 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_rioctl(struct vop_ioctl_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
2009-06-10 13:57:36 +00:00
|
|
|
struct vnode *vp;
|
2004-10-22 09:59:37 +00:00
|
|
|
struct devfs_mount *dmp;
|
2009-06-10 13:57:36 +00:00
|
|
|
int error;
|
2004-10-22 09:59:37 +00:00
|
|
|
|
2009-06-10 13:57:36 +00:00
|
|
|
vp = ap->a_vp;
|
|
|
|
vn_lock(vp, LK_SHARED | LK_RETRY);
|
|
|
|
if (vp->v_iflag & VI_DOOMED) {
|
|
|
|
VOP_UNLOCK(vp, 0);
|
|
|
|
return (EBADF);
|
|
|
|
}
|
|
|
|
dmp = VFSTODEVFS(vp->v_mount);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
sx_xlock(&dmp->dm_lock);
|
2009-06-10 13:57:36 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2006-10-18 11:17:14 +00:00
|
|
|
DEVFS_DMP_HOLD(dmp);
|
2004-10-22 09:59:37 +00:00
|
|
|
devfs_populate(dmp);
|
2006-10-18 11:17:14 +00:00
|
|
|
if (DEVFS_DMP_DROP(dmp)) {
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
devfs_unmount_final(dmp);
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2005-09-15 10:28:19 +00:00
|
|
|
error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
sx_xunlock(&dmp->dm_lock);
|
2004-10-22 09:59:37 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_rread(struct vop_read_args *ap)
|
2004-10-22 09:59:37 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
if (ap->a_vp->v_type != VDIR)
|
|
|
|
return (EINVAL);
|
|
|
|
return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
|
|
|
|
}
|
|
|
|
|
2000-08-27 14:46:36 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_setattr(struct vop_setattr_args *ap)
|
2000-08-27 14:46:36 +00:00
|
|
|
{
|
|
|
|
struct devfs_dirent *de;
|
2000-09-16 12:06:58 +00:00
|
|
|
struct vattr *vap;
|
2001-11-03 17:00:02 +00:00
|
|
|
struct vnode *vp;
|
2008-08-28 15:23:18 +00:00
|
|
|
struct thread *td;
|
2000-09-16 12:06:58 +00:00
|
|
|
int c, error;
|
|
|
|
uid_t uid;
|
|
|
|
gid_t gid;
|
|
|
|
|
|
|
|
vap = ap->a_vap;
|
2001-11-03 17:00:02 +00:00
|
|
|
vp = ap->a_vp;
|
2008-08-28 15:23:18 +00:00
|
|
|
td = curthread;
|
2000-09-16 12:06:58 +00:00
|
|
|
if ((vap->va_type != VNON) ||
|
|
|
|
(vap->va_nlink != VNOVAL) ||
|
|
|
|
(vap->va_fsid != VNOVAL) ||
|
|
|
|
(vap->va_fileid != VNOVAL) ||
|
|
|
|
(vap->va_blocksize != VNOVAL) ||
|
2000-09-18 09:40:01 +00:00
|
|
|
(vap->va_flags != VNOVAL && vap->va_flags != 0) ||
|
2000-09-16 12:06:58 +00:00
|
|
|
(vap->va_rdev != VNOVAL) ||
|
|
|
|
((int)vap->va_bytes != VNOVAL) ||
|
|
|
|
(vap->va_gen != VNOVAL)) {
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
2000-08-27 14:46:36 +00:00
|
|
|
|
2001-11-03 17:00:02 +00:00
|
|
|
de = vp->v_data;
|
|
|
|
if (vp->v_type == VDIR)
|
2000-08-27 14:46:36 +00:00
|
|
|
de = de->de_dir;
|
|
|
|
|
2000-09-16 12:06:58 +00:00
|
|
|
error = c = 0;
|
|
|
|
if (vap->va_uid == (uid_t)VNOVAL)
|
|
|
|
uid = de->de_uid;
|
|
|
|
else
|
|
|
|
uid = vap->va_uid;
|
|
|
|
if (vap->va_gid == (gid_t)VNOVAL)
|
|
|
|
gid = de->de_gid;
|
|
|
|
else
|
|
|
|
gid = vap->va_gid;
|
|
|
|
if (uid != de->de_uid || gid != de->de_gid) {
|
2008-06-01 14:02:46 +00:00
|
|
|
if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
|
|
|
|
(gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
|
2008-08-28 15:23:18 +00:00
|
|
|
error = priv_check(td, PRIV_VFS_CHOWN);
|
2006-11-06 13:42:10 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
}
|
2000-09-16 12:06:58 +00:00
|
|
|
de->de_uid = uid;
|
|
|
|
de->de_gid = gid;
|
2000-08-27 14:46:36 +00:00
|
|
|
c = 1;
|
|
|
|
}
|
2001-11-03 17:00:02 +00:00
|
|
|
|
2000-09-16 12:06:58 +00:00
|
|
|
if (vap->va_mode != (mode_t)VNOVAL) {
|
2008-06-01 14:02:46 +00:00
|
|
|
if (ap->a_cred->cr_uid != de->de_uid) {
|
2008-08-28 15:23:18 +00:00
|
|
|
error = priv_check(td, PRIV_VFS_ADMIN);
|
2006-11-06 13:42:10 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
}
|
2000-09-16 12:06:58 +00:00
|
|
|
de->de_mode = vap->va_mode;
|
2000-08-27 14:46:36 +00:00
|
|
|
c = 1;
|
|
|
|
}
|
2002-04-05 15:16:08 +00:00
|
|
|
|
|
|
|
if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
|
2014-06-17 07:11:00 +00:00
|
|
|
error = vn_utimes_perm(vp, vap, ap->a_cred, td);
|
|
|
|
if (error != 0)
|
2000-09-16 12:06:58 +00:00
|
|
|
return (error);
|
2002-04-05 15:16:08 +00:00
|
|
|
if (vap->va_atime.tv_sec != VNOVAL) {
|
|
|
|
if (vp->v_type == VCHR)
|
|
|
|
vp->v_rdev->si_atime = vap->va_atime;
|
|
|
|
else
|
|
|
|
de->de_atime = vap->va_atime;
|
|
|
|
}
|
|
|
|
if (vap->va_mtime.tv_sec != VNOVAL) {
|
|
|
|
if (vp->v_type == VCHR)
|
|
|
|
vp->v_rdev->si_mtime = vap->va_mtime;
|
|
|
|
else
|
|
|
|
de->de_mtime = vap->va_mtime;
|
|
|
|
}
|
2000-08-27 14:46:36 +00:00
|
|
|
c = 1;
|
|
|
|
}
|
|
|
|
|
2002-04-05 15:16:08 +00:00
|
|
|
if (c) {
|
|
|
|
if (vp->v_type == VCHR)
|
|
|
|
vfs_timestamp(&vp->v_rdev->si_ctime);
|
|
|
|
else
|
|
|
|
vfs_timestamp(&de->de_mtime);
|
|
|
|
}
|
2000-08-27 14:46:36 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2002-07-31 15:45:16 +00:00
|
|
|
#ifdef MAC
|
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_setlabel(struct vop_setlabel_args *ap)
|
2002-07-31 15:45:16 +00:00
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
struct devfs_dirent *de;
|
|
|
|
|
|
|
|
vp = ap->a_vp;
|
|
|
|
de = vp->v_data;
|
|
|
|
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
|
|
|
|
mac_devfs_update(vp->v_mount, de, vp);
|
2002-07-31 15:45:16 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
|
|
|
devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vnops.fo_stat(fp, sb, cred, td));
|
|
|
|
}
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
static int
|
2005-09-12 08:03:15 +00:00
|
|
|
devfs_symlink(struct vop_symlink_args *ap)
|
2000-08-20 21:34:39 +00:00
|
|
|
{
|
2001-02-02 18:35:29 +00:00
|
|
|
int i, error;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_dirent *dd;
|
2010-08-12 15:29:07 +00:00
|
|
|
struct devfs_dirent *de, *de_covered, *de_dotdot;
|
2000-08-24 15:36:55 +00:00
|
|
|
struct devfs_mount *dmp;
|
2006-11-06 13:42:10 +00:00
|
|
|
|
2009-05-11 15:33:26 +00:00
|
|
|
error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
|
2001-02-02 18:35:29 +00:00
|
|
|
if (error)
|
|
|
|
return(error);
|
2000-08-27 14:46:36 +00:00
|
|
|
dmp = VFSTODEVFS(ap->a_dvp->v_mount);
|
2010-09-15 14:23:55 +00:00
|
|
|
if (devfs_populate_vp(ap->a_dvp) != 0)
|
|
|
|
return (ENOENT);
|
|
|
|
|
2000-08-20 21:34:39 +00:00
|
|
|
dd = ap->a_dvp->v_data;
|
|
|
|
de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
|
2010-09-27 17:47:09 +00:00
|
|
|
de->de_flags = DE_USER;
|
2000-08-20 21:34:39 +00:00
|
|
|
de->de_uid = 0;
|
|
|
|
de->de_gid = 0;
|
2001-02-02 18:35:29 +00:00
|
|
|
de->de_mode = 0755;
|
Rewamp DEVFS internals pretty severely [1].
Give DEVFS a proper inode called struct cdev_priv. It is important
to keep in mind that this "inode" is shared between all DEVFS
mountpoints, therefore it is protected by the global device mutex.
Link the cdev_priv's into a list, protected by the global device
mutex. Keep track of each cdev_priv's state with a flag bit and
of references from mountpoints with a dedicated usecount.
Reap the benefits of much improved kernel memory allocator and the
generally better defined device driver APIs to get rid of the tables
of pointers + serial numbers, their overflow tables, the atomics
to muck about in them and all the trouble that resulted in.
This makes RAM the only limit on how many devices we can have.
The cdev_priv is actually a super struct containing the normal cdev
as the "public" part, and therefore allocation and freeing has moved
to devfs_devs.c from kern_conf.c.
The overall responsibility is (to be) split such that kern/kern_conf.c
is the stuff that deals with drivers and struct cdev and fs/devfs
handles filesystems and struct cdev_priv and their private liason
exposed only in devfs_int.h.
Move the inode number from cdev to cdev_priv and allocate inode
numbers properly with unr. Local dirents in the mountpoints
(directories, symlinks) allocate inodes from the same pool to
guarantee against overlaps.
Various other fields are going to migrate from cdev to cdev_priv
in the future in order to hide them. A few fields may migrate
from devfs_dirent to cdev_priv as well.
Protect the DEVFS mountpoint with an sx lock instead of lockmgr,
this lock also protects the directory tree of the mountpoint.
Give each mountpoint a unique integer index, allocated with unr.
Use it into an array of devfs_dirent pointers in each cdev_priv.
Initially the array points to a single element also inside cdev_priv,
but as more devfs instances are mounted, the array is extended with
malloc(9) as necessary when the filesystem populates its directory
tree.
Retire the cdev alias lists, the cdev_priv now know about all the
relevant devfs_dirents (and their vnodes) and devfs_revoke() will
pick them up from there. We still spelunk into other mountpoints
and fondle their data without 100% good locking. It may make better
sense to vector the revoke event into the tty code and there do a
destroy_dev/make_dev on the tty's devices, but that's for further
study.
Lots of shuffling of stuff and churn of bits for no good reason[2].
XXX: There is still nothing preventing the dev_clone EVENTHANDLER
from being invoked at the same time in two devfs mountpoints. It
is not obvious what the best course of action is here.
XXX: comment out an if statement that lost its body, until I can
find out what should go there so it doesn't do damage in the meantime.
XXX: Leave in a few extra malloc types and KASSERTS to help track
down any remaining issues.
Much testing provided by: Kris
Much confusion caused by (races in): md(4)
[1] You are not supposed to understand anything past this point.
[2] This line should simplify life for the peanut gallery.
2005-09-19 19:56:48 +00:00
|
|
|
de->de_inode = alloc_unr(devfs_inos);
|
2010-08-26 16:01:29 +00:00
|
|
|
de->de_dir = dd;
|
2000-08-20 21:34:39 +00:00
|
|
|
de->de_dirent->d_type = DT_LNK;
|
|
|
|
i = strlen(ap->a_target) + 1;
|
2005-09-15 10:28:19 +00:00
|
|
|
de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
|
2000-08-20 21:34:39 +00:00
|
|
|
bcopy(ap->a_target, de->de_symlink, i);
|
2002-10-05 18:40:10 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
|
2002-10-05 18:40:10 +00:00
|
|
|
#endif
|
2010-08-12 15:29:07 +00:00
|
|
|
de_covered = devfs_find(dd, de->de_dirent->d_name,
|
|
|
|
de->de_dirent->d_namlen, 0);
|
|
|
|
if (de_covered != NULL) {
|
2010-12-15 16:49:47 +00:00
|
|
|
if ((de_covered->de_flags & DE_USER) != 0) {
|
|
|
|
devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
|
|
|
|
sx_xunlock(&dmp->dm_lock);
|
|
|
|
return (EEXIST);
|
|
|
|
}
|
2010-08-12 15:29:07 +00:00
|
|
|
KASSERT((de_covered->de_flags & DE_COVERED) == 0,
|
|
|
|
("devfs_symlink: entry %p already covered", de_covered));
|
|
|
|
de_covered->de_flags |= DE_COVERED;
|
|
|
|
}
|
|
|
|
|
|
|
|
de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */
|
|
|
|
de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */
|
|
|
|
TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
|
2010-09-27 17:47:09 +00:00
|
|
|
devfs_dir_ref_de(dmp, dd);
|
2010-12-15 16:42:44 +00:00
|
|
|
devfs_rules_apply(dmp, de);
|
2010-08-12 15:29:07 +00:00
|
|
|
|
2010-08-06 09:23:47 +00:00
|
|
|
return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
|
2000-08-20 21:34:39 +00:00
|
|
|
}
|
|
|
|
|
2008-01-07 20:05:19 +00:00
|
|
|
static int
|
|
|
|
devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (vnops.fo_truncate(fp, length, cred, td));
|
|
|
|
}
|
|
|
|
|
2004-11-08 10:46:47 +00:00
|
|
|
static int
|
2013-10-15 06:28:11 +00:00
|
|
|
devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
|
|
|
|
int flags, struct thread *td)
|
2004-11-08 10:46:47 +00:00
|
|
|
{
|
|
|
|
struct cdev *dev;
|
2012-02-21 01:05:12 +00:00
|
|
|
int error, ioflag, ref;
|
|
|
|
ssize_t resid;
|
2004-11-08 10:46:47 +00:00
|
|
|
struct cdevsw *dsw;
|
2008-09-26 14:50:49 +00:00
|
|
|
struct file *fpop;
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2013-10-15 06:33:10 +00:00
|
|
|
if (uio->uio_resid > DEVFS_IOSIZE_MAX)
|
|
|
|
return (EINVAL);
|
2008-09-26 14:50:49 +00:00
|
|
|
fpop = td->td_fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
2014-10-15 13:16:51 +00:00
|
|
|
if (error != 0) {
|
|
|
|
error = vnops.fo_write(fp, uio, cred, flags, td);
|
2004-11-13 23:21:54 +00:00
|
|
|
return (error);
|
2014-10-15 13:16:51 +00:00
|
|
|
}
|
2004-11-08 10:46:47 +00:00
|
|
|
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
|
2004-12-22 17:05:44 +00:00
|
|
|
ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
|
|
|
|
if (ioflag & O_DIRECT)
|
2004-11-08 10:46:47 +00:00
|
|
|
ioflag |= IO_DIRECT;
|
2012-07-02 21:01:03 +00:00
|
|
|
foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
|
2004-11-08 10:46:47 +00:00
|
|
|
|
|
|
|
resid = uio->uio_resid;
|
|
|
|
|
|
|
|
error = dsw->d_write(dev, uio, ioflag);
|
2015-04-01 08:25:40 +00:00
|
|
|
if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
|
|
|
|
devfs_timestamp(&dev->si_ctime);
|
2004-11-08 10:46:47 +00:00
|
|
|
dev->si_mtime = dev->si_ctime;
|
|
|
|
}
|
2008-09-26 14:50:49 +00:00
|
|
|
td->td_fpop = fpop;
|
2010-08-06 09:42:15 +00:00
|
|
|
dev_relthread(dev, ref);
|
2004-11-08 10:46:47 +00:00
|
|
|
|
2012-07-02 21:01:03 +00:00
|
|
|
foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
|
2004-11-08 10:46:47 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2004-10-22 09:59:37 +00:00
|
|
|
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
static int
|
|
|
|
devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
|
|
|
|
vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
|
|
|
|
struct thread *td)
|
|
|
|
{
|
|
|
|
struct cdev *dev;
|
|
|
|
struct cdevsw *dsw;
|
|
|
|
struct mount *mp;
|
|
|
|
struct vnode *vp;
|
|
|
|
struct file *fpop;
|
|
|
|
vm_object_t object;
|
|
|
|
vm_prot_t maxprot;
|
|
|
|
int error, ref;
|
|
|
|
|
|
|
|
vp = fp->f_vnode;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that file and memory protections are
|
|
|
|
* compatible.
|
|
|
|
*/
|
|
|
|
mp = vp->v_mount;
|
|
|
|
if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0)
|
|
|
|
maxprot = VM_PROT_NONE;
|
|
|
|
else
|
|
|
|
maxprot = VM_PROT_EXECUTE;
|
|
|
|
if ((fp->f_flag & FREAD) != 0)
|
|
|
|
maxprot |= VM_PROT_READ;
|
|
|
|
else if ((prot & VM_PROT_READ) != 0)
|
|
|
|
return (EACCES);
|
|
|
|
|
|
|
|
/*
|
2015-08-06 16:50:37 +00:00
|
|
|
* If we are sharing potential changes via MAP_SHARED and we
|
|
|
|
* are trying to get write permission although we opened it
|
|
|
|
* without asking for it, bail out.
|
|
|
|
*
|
|
|
|
* Note that most character devices always share mappings.
|
|
|
|
* The one exception is that D_MMAP_ANON devices
|
|
|
|
* (i.e. /dev/zero) permit private writable mappings.
|
|
|
|
*
|
|
|
|
* Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
|
|
|
|
* as well as updating maxprot to permit writing for
|
|
|
|
* D_MMAP_ANON devices rather than doing that here.
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
*/
|
2015-08-06 16:50:37 +00:00
|
|
|
if ((flags & MAP_SHARED) != 0) {
|
|
|
|
if ((fp->f_flag & FWRITE) != 0)
|
|
|
|
maxprot |= VM_PROT_WRITE;
|
|
|
|
else if ((prot & VM_PROT_WRITE) != 0)
|
|
|
|
return (EACCES);
|
|
|
|
}
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
maxprot &= cap_maxprot;
|
|
|
|
|
|
|
|
fpop = td->td_fpop;
|
|
|
|
error = devfs_fp_check(fp, &dev, &dsw, &ref);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
|
|
|
|
&object);
|
|
|
|
td->td_fpop = fpop;
|
|
|
|
dev_relthread(dev, ref);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
|
|
|
|
foff, FALSE, td);
|
|
|
|
if (error != 0)
|
|
|
|
vm_object_deallocate(object);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2005-03-10 18:21:34 +00:00
|
|
|
dev_t
|
|
|
|
dev2udev(struct cdev *x)
|
|
|
|
{
|
|
|
|
if (x == NULL)
|
|
|
|
return (NODEV);
|
2008-06-16 17:34:59 +00:00
|
|
|
return (cdev2priv(x)->cdp_inode);
|
2005-03-10 18:21:34 +00:00
|
|
|
}
|
|
|
|
|
2005-09-12 08:03:15 +00:00
|
|
|
static struct fileops devfs_ops_f = {
|
|
|
|
.fo_read = devfs_read_f,
|
|
|
|
.fo_write = devfs_write_f,
|
2008-01-07 20:05:19 +00:00
|
|
|
.fo_truncate = devfs_truncate_f,
|
2005-09-12 08:03:15 +00:00
|
|
|
.fo_ioctl = devfs_ioctl_f,
|
|
|
|
.fo_poll = devfs_poll_f,
|
|
|
|
.fo_kqfilter = devfs_kqfilter_f,
|
|
|
|
.fo_stat = devfs_stat_f,
|
|
|
|
.fo_close = devfs_close_f,
|
2011-08-16 20:07:47 +00:00
|
|
|
.fo_chmod = vn_chmod,
|
|
|
|
.fo_chown = vn_chown,
|
2013-08-16 14:22:20 +00:00
|
|
|
.fo_sendfile = vn_sendfile,
|
2013-08-21 17:36:01 +00:00
|
|
|
.fo_seek = vn_seek,
|
2014-09-22 16:20:47 +00:00
|
|
|
.fo_fill_kinfo = vn_fill_kinfo,
|
Add a new file operations hook for mmap operations. File type-specific
logic is now placed in the mmap hook implementation rather than requiring
it to be placed in sys/vm/vm_mmap.c. This hook allows new file types to
support mmap() as well as potentially allowing mmap() for existing file
types that do not currently support any mapping.
The vm_mmap() function is now split up into two functions. A new
vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
a referenced VM object to map rather than a (handle, handle_type) tuple.
vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
a VM object and then calling vm_mmap_object() to handle the actual mapping.
The vm_mmap() function remains for use by other parts of the kernel
(e.g. device drivers and exec) but now only supports mapping vnodes,
character devices, and anonymous memory.
The mmap() system call invokes vm_mmap_object() directly with a NULL object
for anonymous mappings. For mappings using a file descriptor, the
descriptors fo_mmap() hook is invoked instead. The fo_mmap() hook is
responsible for performing type-specific checks and adjustments to
arguments as well as possibly modifying mapping parameters such as flags
or the object offset. The fo_mmap() hook routines then call
vm_mmap_object() to handle the actual mapping.
The fo_mmap() hook is optional. If it is not set, then fo_mmap() will
fail with ENODEV. A fo_mmap() hook is implemented for regular files,
character devices, and shared memory objects (created via shm_open()).
While here, consistently use the VM_PROT_* constants for the vm_prot_t
type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
Previously some places were using the mmap()-specific PROT_* constants
instead. While this happens to work because PROT_xx == VM_PROT_xx,
using VM_PROT_* is more correct.
Differential Revision: https://reviews.freebsd.org/D2658
Reviewed by: alc (glanced over), kib
MFC after: 1 month
Sponsored by: Chelsio
2015-06-04 19:41:15 +00:00
|
|
|
.fo_mmap = devfs_mmap_f,
|
2005-09-12 08:03:15 +00:00
|
|
|
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct vop_vector devfs_vnodeops = {
|
|
|
|
.vop_default = &default_vnodeops,
|
|
|
|
|
|
|
|
.vop_access = devfs_access,
|
|
|
|
.vop_getattr = devfs_getattr,
|
|
|
|
.vop_ioctl = devfs_rioctl,
|
|
|
|
.vop_lookup = devfs_lookup,
|
|
|
|
.vop_mknod = devfs_mknod,
|
|
|
|
.vop_pathconf = devfs_pathconf,
|
|
|
|
.vop_read = devfs_rread,
|
|
|
|
.vop_readdir = devfs_readdir,
|
|
|
|
.vop_readlink = devfs_readlink,
|
|
|
|
.vop_reclaim = devfs_reclaim,
|
|
|
|
.vop_remove = devfs_remove,
|
|
|
|
.vop_revoke = devfs_revoke,
|
|
|
|
.vop_setattr = devfs_setattr,
|
|
|
|
#ifdef MAC
|
|
|
|
.vop_setlabel = devfs_setlabel,
|
|
|
|
#endif
|
|
|
|
.vop_symlink = devfs_symlink,
|
2008-12-12 01:00:38 +00:00
|
|
|
.vop_vptocnp = devfs_vptocnp,
|
2005-09-12 08:03:15 +00:00
|
|
|
};
|
|
|
|
|
2015-02-27 16:43:50 +00:00
|
|
|
struct vop_vector devfs_specops = {
|
2005-09-12 08:03:15 +00:00
|
|
|
.vop_default = &default_vnodeops,
|
|
|
|
|
|
|
|
.vop_access = devfs_access,
|
|
|
|
.vop_bmap = VOP_PANIC,
|
|
|
|
.vop_close = devfs_close,
|
|
|
|
.vop_create = VOP_PANIC,
|
|
|
|
.vop_fsync = devfs_fsync,
|
|
|
|
.vop_getattr = devfs_getattr,
|
|
|
|
.vop_link = VOP_PANIC,
|
|
|
|
.vop_mkdir = VOP_PANIC,
|
|
|
|
.vop_mknod = VOP_PANIC,
|
|
|
|
.vop_open = devfs_open,
|
|
|
|
.vop_pathconf = devfs_pathconf,
|
2014-10-15 13:16:51 +00:00
|
|
|
.vop_poll = dead_poll,
|
2005-09-12 08:03:15 +00:00
|
|
|
.vop_print = devfs_print,
|
2014-10-15 13:16:51 +00:00
|
|
|
.vop_read = dead_read,
|
2005-09-12 08:03:15 +00:00
|
|
|
.vop_readdir = VOP_PANIC,
|
|
|
|
.vop_readlink = VOP_PANIC,
|
|
|
|
.vop_reallocblks = VOP_PANIC,
|
|
|
|
.vop_reclaim = devfs_reclaim,
|
|
|
|
.vop_remove = devfs_remove,
|
|
|
|
.vop_rename = VOP_PANIC,
|
|
|
|
.vop_revoke = devfs_revoke,
|
|
|
|
.vop_rmdir = VOP_PANIC,
|
|
|
|
.vop_setattr = devfs_setattr,
|
|
|
|
#ifdef MAC
|
|
|
|
.vop_setlabel = devfs_setlabel,
|
|
|
|
#endif
|
|
|
|
.vop_strategy = VOP_PANIC,
|
|
|
|
.vop_symlink = VOP_PANIC,
|
2008-12-12 01:00:38 +00:00
|
|
|
.vop_vptocnp = devfs_vptocnp,
|
2014-10-15 13:16:51 +00:00
|
|
|
.vop_write = dead_write,
|
2005-09-12 08:03:15 +00:00
|
|
|
};
|
|
|
|
|
2004-12-22 16:25:50 +00:00
|
|
|
/*
|
|
|
|
* Our calling convention to the device drivers used to be that we passed
|
|
|
|
* vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
|
|
|
|
* flags instead since that's what open(), close() and ioctl() takes and
|
|
|
|
* we don't really want vnode.h in device drivers.
|
|
|
|
* We solved the source compatibility by redefining some vnode flags to
|
|
|
|
* be the same as the fcntl ones and by sending down the bitwise OR of
|
|
|
|
* the respective fcntl/vnode flags. These CTASSERTS make sure nobody
|
|
|
|
* pulls the rug out under this.
|
|
|
|
*/
|
|
|
|
CTASSERT(O_NONBLOCK == IO_NDELAY);
|
|
|
|
CTASSERT(O_FSYNC == IO_SYNC);
|