freebsd-dev/sys/miscfs/devfs/devfs_vnops.c
Poul-Henning Kamp 75c1354190 This Implements the mumbled about "Jail" feature.
This is a seriously beefed up chroot kind of thing.  The process
is jailed along the same lines as a chroot does it, but with
additional tough restrictions imposed on what the superuser can do.

For all I know, it is safe to hand over the root bit inside a
prison to the customer living in that prison, this is what
it was developed for in fact:  "real virtual servers".

Each prison has an ip number associated with it, which all IP
communications will be coerced to use and each prison has its own
hostname.

Needless to say, you need more RAM this way, but the advantage is
that each customer can run their own particular version of apache
and not stomp on the toes of their neighbors.

It generally does what one would expect, but setting up a jail
still takes a little knowledge.

A few notes:

   I have no scripts for setting up a jail, don't ask me for them.

   The IP number should be an alias on one of the interfaces.

   mount a /proc in each jail, it will make ps more useable.

   /proc/<pid>/status tells the hostname of the prison for
   jailed processes.

   Quotas are only sensible if you have a mountpoint per prison.

   There are no privisions for stopping resource-hogging.

   Some "#ifdef INET" and similar may be missing (send patches!)

If somebody wants to take it from here and develop it into
more of a "virtual machine" they should be most welcome!

Tools, comments, patches & documentation most welcome.

Have fun...

Sponsored by:   http://www.rndassociates.com/
Run for almost a year by:       http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00

2137 lines
52 KiB
C

/*
* Copyright 1997,1998 Julian Elischer. All rights reserved.
* julian@freebsd.org
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: devfs_vnops.c,v 1.71 1999/04/27 11:16:31 phk Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/namei.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/disklabel.h>
#include <sys/lock.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <miscfs/specfs/specdev.h>/* definitions of spec functions we use */
#include <sys/dirent.h>
#include <miscfs/devfs/devfsdefs.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_prot.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
#include <vm/vm_extern.h>
/*
* Insert description here
*/
/*
* Convert a component of a pathname into a pointer to a locked node.
* This is a very central and rather complicated routine.
* If the file system is not maintained in a strict tree hierarchy,
* this can result in a deadlock situation (see comments in code below).
*
* The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
* whether the name is to be looked up, created, renamed, or deleted.
* When CREATE, RENAME, or DELETE is specified, information usable in
* creating, renaming, or deleting a directory entry may be calculated.
* If flag has LOCKPARENT or'ed into it and the target of the pathname
* exists, lookup returns both the target and its parent directory locked.
* When creating or renaming and LOCKPARENT is specified, the target may
* not be ".". When deleting and LOCKPARENT is specified, the target may
* be "."., but the caller must check to ensure it does an vrele and DNUNLOCK
* instead of two DNUNLOCKs.
*
* Overall outline of devfs_lookup:
*
* check accessibility of directory
* null terminate the component (lookup leaves the whole string alone)
* look for name in cache, if found, then if at end of path
* and deleting or creating, drop it, else return name
* search for name in directory, to found or notfound
* notfound:
* if creating, return locked directory,
* else return error
* found:
* if at end of path and deleting, return information to allow delete
* if at end of path and rewriting (RENAME and LOCKPARENT), lock target
* node and return info to allow rewrite
* if not at end, add name to cache; if at end and neither creating
* nor deleting, add name to cache
* On return to lookup, remove the null termination we put in at the start.
*
* NOTE: (LOOKUP | LOCKPARENT) currently returns the parent node unlocked.
*/
static int
devfs_lookup(struct vop_lookup_args *ap)
/*struct vop_lookup_args {
struct vnode * a_dvp; directory vnode ptr
struct vnode ** a_vpp; where to put the result
struct componentname * a_cnp; the name we want
};*/
{
struct componentname *cnp = ap->a_cnp;
struct vnode *dir_vnode = ap->a_dvp;
struct vnode **result_vnode = ap->a_vpp;
dn_p dir_node; /* the directory we are searching */
dn_p new_node; /* the node we are searching for */
devnm_p new_nodename;
int flags = cnp->cn_flags;
int op = cnp->cn_nameiop; /* LOOKUP, CREATE, RENAME, or DELETE */
int lockparent = flags & LOCKPARENT;
int wantparent = flags & (LOCKPARENT|WANTPARENT);
int error = 0;
struct proc *p = cnp->cn_proc;
char heldchar; /* the char at the end of the name componet */
*result_vnode = NULL; /* safe not sorry */ /*XXX*/
DBPRINT(("lookup\n"));
if (dir_vnode->v_usecount == 0)
printf("dir had no refs ");
if (devfs_vntodn(dir_vnode,&dir_node))
{
printf("vnode has changed?\n");
vprint("=",dir_vnode);
return(EINVAL);
}
/*
* Check accessiblity of directory.
*/
if (dir_node->type != DEV_DIR) /* XXX or symlink? */
{
return (ENOTDIR);
}
if ((error = VOP_ACCESS(dir_vnode, VEXEC, cnp->cn_cred, p)) != 0)
{
return (error);
}
/*
* We now have a segment name to search for, and a directory to search.
*
*/
/***********************************************************************\
* SEARCH FOR NAME *
* while making sure the component is null terminated for the strcmp *
\***********************************************************************/
heldchar = cnp->cn_nameptr[cnp->cn_namelen];
cnp->cn_nameptr[cnp->cn_namelen] = '\0';
new_nodename = dev_findname(dir_node,cnp->cn_nameptr);
cnp->cn_nameptr[cnp->cn_namelen] = heldchar;
if(!new_nodename) {
/*******************************************************\
* Failed to find it.. (That may be good) *
\*******************************************************/
new_node = NULL; /* to be safe */
/*
* If creating, and at end of pathname
* then can consider
* allowing file to be created.
*/
if (!(flags & ISLASTCN) || !(op == CREATE || op == RENAME)) {
return ENOENT;
}
/*
* Access for write is interpreted as allowing
* creation of files in the directory.
*/
if ((error = VOP_ACCESS(dir_vnode, VWRITE,
cnp->cn_cred, p)) != 0)
{
DBPRINT(("MKACCESS "));
return (error);
}
/*
* We return with the directory locked, so that
* the parameters we set up above will still be
* valid if we actually decide to add a new entry.
* We return ni_vp == NULL to indicate that the entry
* does not currently exist; we leave a pointer to
* the (locked) directory vnode in namei_data->ni_dvp.
* The pathname buffer is saved so that the name
* can be obtained later.
*
* NB - if the directory is unlocked, then this
* information cannot be used.
*/
cnp->cn_flags |= SAVENAME; /*XXX why? */
if (!lockparent)
VOP_UNLOCK(dir_vnode, 0, p);
return (EJUSTRETURN);
}
/***************************************************************\
* Found it.. this is not always a good thing.. *
\***************************************************************/
new_node = new_nodename->dnp;
new_node->last_lookup = new_nodename; /* for unlink */
/*
* If deleting, and at end of pathname, return
* parameters which can be used to remove file.
* If the wantparent flag isn't set, we return only
* the directory (in namei_data->ni_dvp), otherwise we go
* on and lock the node, being careful with ".".
*/
if (op == DELETE && (flags & ISLASTCN)) {
/*
* Write access to directory required to delete files.
*/
if ((error = VOP_ACCESS(dir_vnode, VWRITE,
cnp->cn_cred, p)) != 0)
return (error);
/*
* we are trying to delete '.'. What does this mean? XXX
*/
if (dir_node == new_node) {
VREF(dir_vnode);
*result_vnode = dir_vnode;
return (0);
}
/*
* If directory is "sticky", then user must own
* the directory, or the file in it, else she
* may not delete it (unless she's root). This
* implements append-only directories.
*/
devfs_dntovn(new_node,result_vnode);
#ifdef NOTYET
if ((dir_node->mode & ISVTX) &&
cnp->cn_cred->cr_uid != 0 &&
cnp->cn_cred->cr_uid != dir_node->uid &&
cnp->cn_cred->cr_uid != new_node->uid) {
VOP_UNLOCK(*result_vnode, 0, p);
return (EPERM);
}
#endif
if (!lockparent)
VOP_UNLOCK(dir_vnode, 0, p);
return (0);
}
/*
* If rewriting (RENAME), return the vnode and the
* information required to rewrite the present directory
* Must get node of directory entry to verify it's a
* regular file, or empty directory.
*/
if (op == RENAME && wantparent && (flags & ISLASTCN)) {
/*
* Are we allowed to change the holding directory?
*/
if ((error = VOP_ACCESS(dir_vnode, VWRITE,
cnp->cn_cred, p)) != 0)
return (error);
/*
* Careful about locking second node.
* This can only occur if the target is ".".
*/
if (dir_node == new_node)
return (EISDIR);
devfs_dntovn(new_node,result_vnode);
/* hmm save the 'from' name (we need to delete it) */
cnp->cn_flags |= SAVENAME;
if (!lockparent)
VOP_UNLOCK(dir_vnode, 0, p);
return (0);
}
/*
* Step through the translation in the name. We do not unlock the
* directory because we may need it again if a symbolic link
* is relative to the current directory. Instead we save it
* unlocked as "saved_dir_node" XXX. We must get the target
* node before unlocking
* the directory to insure that the node will not be removed
* before we get it. We prevent deadlock by always fetching
* nodes from the root, moving down the directory tree. Thus
* when following backward pointers ".." we must unlock the
* parent directory before getting the requested directory.
* There is a potential race condition here if both the current
* and parent directories are removed before the lock for the
* node associated with ".." returns. We hope that this occurs
* infrequently since we cannot avoid this race condition without
* implementing a sophisticated deadlock detection algorithm.
* Note also that this simple deadlock detection scheme will not
* work if the file system has any hard links other than ".."
* that point backwards in the directory structure.
*/
if (flags & ISDOTDOT) {
VOP_UNLOCK(dir_vnode, 0, p); /* race to get the node */
devfs_dntovn(new_node,result_vnode);
if (lockparent && (flags & ISLASTCN))
vn_lock(dir_vnode, LK_EXCLUSIVE | LK_RETRY, p);
} else if (dir_node == new_node) {
VREF(dir_vnode); /* we want ourself, ie "." */
*result_vnode = dir_vnode;
} else {
devfs_dntovn(new_node,result_vnode);
if (!lockparent || (flags & ISLASTCN))
VOP_UNLOCK(dir_vnode, 0, p);
}
DBPRINT(("GOT\n"));
return (0);
}
/*
*/
static int
devfs_access(struct vop_access_args *ap)
/*struct vop_access_args {
struct vnode *a_vp;
int a_mode;
struct ucred *a_cred;
struct proc *a_p;
} */
{
/*
* mode is filled with a combination of VREAD, VWRITE,
* and/or VEXEC bits turned on. In an octal number these
* are the Y in 0Y00.
*/
struct vnode *vp = ap->a_vp;
int mode = ap->a_mode;
struct ucred *cred = ap->a_cred;
dn_p file_node;
int error;
gid_t *gp;
int i;
DBPRINT(("access\n"));
if ((error = devfs_vntodn(vp,&file_node)) != 0)
{
printf("devfs_vntodn returned %d ",error);
return error;
}
/*
* if we are not running as a process, we are in the
* kernel and we DO have permission
*/
if (ap->a_p == NULL)
return 0;
/*
* Access check is based on only one of owner, group, public.
* If not owner, then check group. If not a member of the
* group, then check public access.
*/
if (cred->cr_uid != file_node->uid)
{
/* failing that.. try groups */
mode >>= 3;
gp = cred->cr_groups;
for (i = 0; i < cred->cr_ngroups; i++, gp++)
{
if (file_node->gid == *gp)
{
goto found;
}
}
/* failing that.. try general access */
mode >>= 3;
found:
;
}
if ((file_node->mode & mode) == mode)
return (0);
/*
* Root gets to do anything.
* but only use suser_xxx prives as a last resort
* (Use of super powers is recorded in ap->a_p->p_acflag)
*/
if( suser_xxx(cred, ap->a_p, 0) == 0) /* XXX what if no proc? */
return 0;
return (EACCES);
}
static int
devfs_getattr(struct vop_getattr_args *ap)
/*struct vop_getattr_args {
struct vnode *a_vp;
struct vattr *a_vap;
struct ucred *a_cred;
struct proc *a_p;
} */
{
struct vnode *vp = ap->a_vp;
struct vattr *vap = ap->a_vap;
dn_p file_node;
int error;
DBPRINT(("getattr\n"));
if ((error = devfs_vntodn(vp,&file_node)) != 0)
{
printf("devfs_vntodn returned %d ",error);
return error;
}
vap->va_rdev = 0;/* default value only */
vap->va_mode = file_node->mode;
switch (file_node->type)
{
case DEV_DIR:
vap->va_rdev = (dev_t)file_node->dvm;
vap->va_mode |= (S_IFDIR);
break;
case DEV_CDEV:
vap->va_rdev = file_node->by.Cdev.dev;
vap->va_mode |= (S_IFCHR);
break;
case DEV_BDEV:
vap->va_rdev = file_node->by.Bdev.dev;
vap->va_mode |= (S_IFBLK);
break;
case DEV_SLNK:
break;
}
vap->va_type = vp->v_type;
vap->va_nlink = file_node->links;
vap->va_uid = file_node->uid;
vap->va_gid = file_node->gid;
vap->va_fsid = (intptr_t)(void *)file_node->dvm;
vap->va_fileid = (intptr_t)(void *)file_node;
vap->va_size = file_node->len; /* now a u_quad_t */
vap->va_blocksize = 512;
/*
* XXX If the node times are in Jan 1, 1970, then
* update them to the boot time.
* When we made the node, the date/time was not yet known.
*/
if(file_node->ctime.tv_sec < (24 * 3600))
{
TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->ctime));
TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->mtime));
TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->atime));
}
if (file_node->flags & IN_ACCESS) {
nanotime(&file_node->atime);
file_node->flags &= ~IN_ACCESS;
}
vap->va_ctime = file_node->ctime;
vap->va_mtime = file_node->mtime;
vap->va_atime = file_node->atime;
vap->va_gen = 0;
vap->va_flags = 0;
vap->va_bytes = file_node->len; /* u_quad_t */
vap->va_filerev = 0; /* XXX */ /* u_quad_t */
vap->va_vaflags = 0; /* XXX */
return 0;
}
static int
devfs_setattr(struct vop_setattr_args *ap)
/*struct vop_setattr_args {
struct vnode *a_vp;
struct vattr *a_vap;
struct ucred *a_cred;
struct proc *a_p;
} */
{
struct vnode *vp = ap->a_vp;
struct vattr *vap = ap->a_vap;
struct ucred *cred = ap->a_cred;
struct proc *p = ap->a_p;
int error = 0;
gid_t *gp;
int i;
dn_p file_node;
if (vap->va_flags != VNOVAL) /* XXX needs to be implemented */
return (EOPNOTSUPP);
if ((error = devfs_vntodn(vp,&file_node)) != 0)
{
printf("devfs_vntodn returned %d ",error);
return error;
}
DBPRINT(("setattr\n"));
if ((vap->va_type != VNON) ||
(vap->va_nlink != VNOVAL) ||
(vap->va_fsid != VNOVAL) ||
(vap->va_fileid != VNOVAL) ||
(vap->va_blocksize != VNOVAL) ||
(vap->va_rdev != VNOVAL) ||
(vap->va_bytes != VNOVAL) ||
(vap->va_gen != VNOVAL ))
{
return EINVAL;
}
/*
* Anyone can touch the files in such a way that the times are set
* to NOW (e.g. run 'touch') if they have write permissions
* however only the owner or root can set "un-natural times.
* They also don't need write permissions.
*/
if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
#if 0 /*
* This next test is pointless under devfs for now..
* as there is only one devfs hiding under potentially many
* mountpoints and actual device node are really 'mounted' under
* a FAKE mountpoint inside the kernel only, no matter where it
* APPEARS they are mounted to the outside world..
* A readonly devfs doesn't exist anyway.
*/
if (vp->v_mount->mnt_flag & MNT_RDONLY)
return (EROFS);
#endif
if (((vap->va_vaflags & VA_UTIMES_NULL) == 0) &&
(cred->cr_uid != file_node->uid) &&
suser_xxx(cred, p, 0))
return (EPERM);
if(VOP_ACCESS(vp, VWRITE, cred, p))
return (EACCES);
file_node->atime = vap->va_atime;
file_node->mtime = vap->va_mtime;
nanotime(&file_node->ctime);
return (0);
}
/*
* Change the permissions.. must be root or owner to do this.
*/
if (vap->va_mode != (u_short)VNOVAL) {
if ((cred->cr_uid != file_node->uid)
&& suser_xxx(cred, p, 0))
return (EPERM);
/* set drwxwxrwx stuff */
file_node->mode &= ~07777;
file_node->mode |= vap->va_mode & 07777;
}
/*
* Change the owner.. must be root to do this.
*/
if (vap->va_uid != (uid_t)VNOVAL) {
if (suser_xxx(cred, p, 0))
return (EPERM);
file_node->uid = vap->va_uid;
}
/*
* Change the group.. must be root or owner to do this.
* If we are the owner, we must be in the target group too.
* don't use suser_xxx() unless you have to as it reports
* whether you needed suser_xxx powers or not.
*/
if (vap->va_gid != (gid_t)VNOVAL) {
if (cred->cr_uid == file_node->uid){
gp = cred->cr_groups;
for (i = 0; i < cred->cr_ngroups; i++, gp++) {
if (vap->va_gid == *gp)
goto cando;
}
}
/*
* we can't do it with normal privs,
* do we have an ace up our sleeve?
*/
if( suser_xxx(cred, p, 0))
return (EPERM);
cando:
file_node->gid = vap->va_gid;
}
#if 0
/*
* Copied from somewhere else
* but only kept as a marker and reminder of the fact that
* flags should be handled some day
*/
if (vap->va_flags != VNOVAL) {
if (error = suser_xxx(cred, p, 0))
return error;
if (cred->cr_uid == 0)
;
else {
}
}
#endif
return error;
}
static int
devfs_xread(struct vop_read_args *ap)
/*struct vop_read_args {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
} */
{
int error = 0;
dn_p file_node;
DBPRINT(("read\n"));
if ((error = devfs_vntodn(ap->a_vp,&file_node)) != 0)
{
printf("devfs_vntodn returned %d ",error);
return error;
}
switch (ap->a_vp->v_type) {
case VREG:
return(EINVAL);
case VDIR:
return VOP_READDIR(ap->a_vp,ap->a_uio,ap->a_cred,
NULL,NULL,NULL);
case VCHR:
case VBLK:
panic("devfs: vnode methods");
default:
panic("devfs_read(): bad file type");
break;
}
}
/*
* Write data to a file or directory.
*/
static int
devfs_xwrite(struct vop_write_args *ap)
/*struct vop_write_args {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
} */
{
switch (ap->a_vp->v_type) {
case VREG:
return(EINVAL);
case VDIR:
return(EISDIR);
case VCHR:
case VBLK:
panic("devfs: vnode methods");
default:
panic("devfs_write(): bad file type");
}
}
static int
devfs_remove(struct vop_remove_args *ap)
/*struct vop_remove_args {
struct vnode *a_dvp;
struct vnode *a_vp;
struct componentname *a_cnp;
} */
{
struct vnode *vp = ap->a_vp;
struct vnode *dvp = ap->a_dvp;
struct componentname *cnp = ap->a_cnp;
dn_p tp, tdp;
devnm_p tnp;
int doingdirectory = 0;
int error = 0;
uid_t ouruid = cnp->cn_cred->cr_uid;
DBPRINT(("remove\n"));
/*
* Lock our directories and get our name pointers
* assume that the names are null terminated as they
* are the end of the path. Get pointers to all our
* devfs structures.
*/
if ((error = devfs_vntodn(dvp, &tdp)) != 0) {
abortit:
VOP_ABORTOP(dvp, cnp);
return (error);
}
if ((error = devfs_vntodn(vp, &tp)) != 0) goto abortit;
/*
* Assuming we are atomic, dev_lookup left this for us
*/
tnp = tp->last_lookup;
/*
* Check we are doing legal things WRT the new flags
*/
if ((tp->flags & (IMMUTABLE | APPEND))
|| (tdp->flags & APPEND) /*XXX eh?*/ ) {
error = EPERM;
goto abortit;
}
/*
* Make sure that we don't try do something stupid
*/
if ((tp->type) == DEV_DIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ( (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
|| (cnp->cn_flags&ISDOTDOT) ) {
error = EINVAL;
goto abortit;
}
doingdirectory++;
}
/***********************************
* Start actually doing things.... *
***********************************/
getnanotime(&(tdp->mtime));
/*
* own the parent directory, or the destination of the rename,
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
* XXX shoudn't this be in generic code?
*/
if ((tdp->mode & S_ISTXT)
&& ouruid != 0
&& ouruid != tdp->uid
&& ouruid != tp->uid ) {
error = EPERM;
goto abortit;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
if (( doingdirectory) && (tp->links > 2)) {
printf("nlink = %d\n",tp->links); /*XXX*/
error = ENOTEMPTY;
goto abortit;
}
dev_free_name(tnp);
tp = NULL;
return (error);
}
/*
*/
static int
devfs_link(struct vop_link_args *ap)
/*struct vop_link_args {
struct vnode *a_tdvp;
struct vnode *a_vp;
struct componentname *a_cnp;
} */
{
struct vnode *vp = ap->a_vp;
struct vnode *tdvp = ap->a_tdvp;
struct componentname *cnp = ap->a_cnp;
dn_p fp, tdp;
devnm_p tnp;
int error = 0;
DBPRINT(("link\n"));
/*
* First catch an arbitrary restriction for this FS
*/
if(cnp->cn_namelen > DEVMAXNAMESIZE) {
error = ENAMETOOLONG;
goto abortit;
}
/*
* Lock our directories and get our name pointers
* assume that the names are null terminated as they
* are the end of the path. Get pointers to all our
* devfs structures.
*/
if ((error = devfs_vntodn(tdvp,&tdp)) != 0) goto abortit;
if ((error = devfs_vntodn(vp,&fp)) != 0) goto abortit;
/*
* trying to move it out of devfs? (v_tag == VT_DEVFS)
*/
if ( (vp->v_tag != VT_DEVFS)
|| (vp->v_tag != tdvp->v_tag) ) {
error = EXDEV;
abortit:
VOP_ABORTOP(tdvp, cnp);
goto out;
}
/*
* Check we are doing legal things WRT the new flags
*/
if (fp->flags & (IMMUTABLE | APPEND)) {
error = EPERM;
goto abortit;
}
/***********************************
* Start actually doing things.... *
***********************************/
getnanotime(&(tdp->atime));
error = dev_add_name(cnp->cn_nameptr,
tdp,
NULL,
fp,
&tnp);
out:
return (error);
}
/*
* Rename system call. Seems overly complicated to me...
* rename("foo", "bar");
* is essentially
* unlink("bar");
* link("foo", "bar");
* unlink("foo");
* but ``atomically''.
*
* When the target exists, both the directory
* and target vnodes are locked.
* the source and source-parent vnodes are referenced
*
*
* Basic algorithm is:
*
* 1) Bump link count on source while we're linking it to the
* target. This also ensure the inode won't be deleted out
* from underneath us while we work (it may be truncated by
* a concurrent `trunc' or `open' for creation).
* 2) Link source to destination. If destination already exists,
* delete it first.
* 3) Unlink source reference to node if still around. If a
* directory was moved and the parent of the destination
* is different from the source, patch the ".." entry in the
* directory.
*/
static int
devfs_rename(struct vop_rename_args *ap)
/*struct vop_rename_args {
struct vnode *a_fdvp;
struct vnode *a_fvp;
struct componentname *a_fcnp;
struct vnode *a_tdvp;
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */
{
struct vnode *tvp = ap->a_tvp;
struct vnode *tdvp = ap->a_tdvp;
struct vnode *fvp = ap->a_fvp;
struct vnode *fdvp = ap->a_fdvp;
struct componentname *tcnp = ap->a_tcnp;
struct componentname *fcnp = ap->a_fcnp;
struct proc *p = fcnp->cn_proc;
dn_p fp, fdp, tp, tdp;
devnm_p fnp,tnp;
int doingdirectory = 0;
int error = 0;
/*
* First catch an arbitrary restriction for this FS
*/
if(tcnp->cn_namelen > DEVMAXNAMESIZE) {
error = ENAMETOOLONG;
goto abortit;
}
/*
* Lock our directories and get our name pointers
* assume that the names are null terminated as they
* are the end of the path. Get pointers to all our
* devfs structures.
*/
if ((error = devfs_vntodn(tdvp,&tdp)) != 0) goto abortit;
if ((error = devfs_vntodn(fdvp,&fdp)) != 0) goto abortit;
if ((error = devfs_vntodn(fvp,&fp)) != 0) goto abortit;
fnp = fp->last_lookup;
if (tvp) {
if ((error = devfs_vntodn(tvp,&tp)) != 0) goto abortit;
tnp = tp->last_lookup;
} else {
tp = NULL;
tnp = NULL;
}
/*
* trying to move it out of devfs? (v_tag == VT_DEVFS)
* if we move a dir across mnt points. we need to fix all
* the mountpoint pointers! XXX
* so for now keep dirs within the same mount
*/
if ( (fvp->v_tag != VT_DEVFS)
|| (fvp->v_tag != tdvp->v_tag)
|| (tvp && (fvp->v_tag != tvp->v_tag))
|| ((fp->type == DEV_DIR) && (fp->dvm != tdp->dvm ))) {
error = EXDEV;
abortit:
VOP_ABORTOP(tdvp, tcnp);
if (tdvp == tvp) /* eh? */
vrele(tdvp);
else
vput(tdvp);
if (tvp)
vput(tvp);
VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
vrele(fdvp);
vrele(fvp);
return (error);
}
/*
* Check we are doing legal things WRT the new flags
*/
if ((tp && (tp->flags & (IMMUTABLE | APPEND)))
|| (fp->flags & (IMMUTABLE | APPEND))
|| (fdp->flags & APPEND)) {
error = EPERM;
goto abortit;
}
/*
* Make sure that we don't try do something stupid
*/
if ((fp->type) == DEV_DIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')
|| (fcnp->cn_flags&ISDOTDOT)
|| (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')
|| (tcnp->cn_flags&ISDOTDOT)
|| (tdp == fp )) {
error = EINVAL;
goto abortit;
}
doingdirectory++;
}
/*
* If ".." must be changed (ie the directory gets a new
* parent) then the source directory must not be in the
* directory heirarchy above the target, as this would
* orphan everything below the source directory. Also
* the user must have write permission in the source so
* as to be able to change "..".
*/
if (doingdirectory && (tdp != fdp)) {
dn_p tmp,ntmp;
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
tmp = tdp;
do {
if(tmp == fp) {
/* XXX unlock stuff here probably */
error = EINVAL;
goto out;
}
ntmp = tmp;
} while ((tmp = tmp->by.Dir.parent) != ntmp);
}
/***********************************
* Start actually doing things.... *
***********************************/
getnanotime(&(fp->atime));
/*
* Check if just deleting a link name.
*/
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
error = EINVAL;
goto abortit;
}
/* Release destination completely. */
VOP_ABORTOP(tdvp, tcnp);
vput(tdvp);
vput(tvp);
/* Delete source. */
VOP_ABORTOP(fdvp, fcnp); /*XXX*/
vrele(fdvp);
vrele(fvp);
dev_free_name(fnp);
return 0;
}
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
* completing our work, too bad :)
*/
fp->links++;
/*
* If the target exists zap it (unless it's a non-empty directory)
* We could do that as well but won't
*/
if (tp) {
int ouruid = tcnp->cn_cred->cr_uid;
/*
* If the parent directory is "sticky", then the user must
* own the parent directory, or the destination of the rename,
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
* XXX shoudn't this be in generic code?
*/
if ((tdp->mode & S_ISTXT)
&& ouruid != 0
&& ouruid != tdp->uid
&& ouruid != tp->uid ) {
error = EPERM;
goto bad;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
if (( doingdirectory) && (tp->links > 2)) {
printf("nlink = %d\n",tp->links); /*XXX*/
error = ENOTEMPTY;
goto bad;
}
dev_free_name(tnp);
tp = NULL;
}
dev_add_name(tcnp->cn_nameptr,tdp,fnp->as.front.realthing,fp,&tnp);
fnp->dnp = NULL;
fp->links--; /* one less link to it.. */
dev_free_name(fnp);
fp->links--; /* we added one earlier*/
if (tdp)
vput(tdvp);
if (tp)
vput(fvp);
vrele(ap->a_fvp);
return (error);
bad:
if (tp)
vput(tvp);
vput(tdvp);
out:
if (vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p) == 0) {
fp->links--; /* we added one earlier*/
vput(fvp);
} else
vrele(fvp);
return (error);
}
static int
devfs_symlink(struct vop_symlink_args *ap)
/*struct vop_symlink_args {
struct vnode *a_dvp;
struct vnode **a_vpp;
struct componentname *a_cnp;
struct vattr *a_vap;
char *a_target;
} */
{
struct vnode *vp;
int error;
dn_p dnp;
union typeinfo by;
devnm_p nm_p;
DBPRINT(("symlink\n"));
if((error = devfs_vntodn(ap->a_dvp, &dnp)) != 0) {
return (error);
}
by.Slnk.name = ap->a_target;
by.Slnk.namelen = strlen(ap->a_target);
dev_add_entry(ap->a_cnp->cn_nameptr, dnp, DEV_SLNK, &by,
NULL, NULL, &nm_p);
if((error = devfs_dntovn(nm_p->dnp, &vp)) != 0) {
return (error);
}
VOP_SETATTR(vp, ap->a_vap, ap->a_cnp->cn_cred, ap->a_cnp->cn_proc);
*ap->a_vpp = NULL;
vput(vp);
return 0;
}
/*
* Vnode op for readdir
*/
static int
devfs_readdir(struct vop_readdir_args *ap)
/*struct vop_readdir_args {
struct vnode *a_vp;
struct uio *a_uio;
struct ucred *a_cred;
int *eofflag;
int *ncookies;
u_int **cookies;
} */
{
struct vnode *vp = ap->a_vp;
struct uio *uio = ap->a_uio;
struct dirent dirent;
dn_p dir_node;
devnm_p name_node;
char *name;
int error = 0;
int reclen;
int nodenumber;
int startpos,pos;
DBPRINT(("readdir\n"));
/* set up refs to dir */
if ((error = devfs_vntodn(vp,&dir_node)) != 0)
return error;
if(dir_node->type != DEV_DIR)
return(ENOTDIR);
pos = 0;
startpos = uio->uio_offset;
name_node = dir_node->by.Dir.dirlist;
nodenumber = 0;
getnanotime(&(dir_node->atime));
while ((name_node || (nodenumber < 2)) && (uio->uio_resid > 0))
{
switch(nodenumber)
{
case 0:
dirent.d_fileno = (uintptr_t)(void *)dir_node;
name = ".";
dirent.d_namlen = 1;
dirent.d_type = DT_DIR;
break;
case 1:
if(dir_node->by.Dir.parent)
dirent.d_fileno
= (uintptr_t)(void *)dir_node->by.Dir.parent;
else
dirent.d_fileno = (uintptr_t)(void *)dir_node;
name = "..";
dirent.d_namlen = 2;
dirent.d_type = DT_DIR;
break;
default:
dirent.d_fileno = (uintptr_t)(void *)name_node->dnp;
dirent.d_namlen = strlen(name_node->name);
name = name_node->name;
switch(name_node->dnp->type) {
case DEV_BDEV:
dirent.d_type = DT_BLK;
break;
case DEV_CDEV:
dirent.d_type = DT_CHR;
break;
case DEV_DDEV:
dirent.d_type = DT_SOCK; /*XXX*/
break;
case DEV_DIR:
dirent.d_type = DT_DIR;
break;
case DEV_SLNK:
dirent.d_type = DT_LNK;
break;
default:
dirent.d_type = DT_UNKNOWN;
}
}
reclen = dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
if(pos >= startpos) /* made it to the offset yet? */
{
if (uio->uio_resid < reclen) /* will it fit? */
break;
strcpy( dirent.d_name,name);
if ((error = uiomove ((caddr_t)&dirent,
dirent.d_reclen, uio)) != 0)
break;
}
pos += reclen;
if((nodenumber >1) && name_node)
name_node = name_node->next;
nodenumber++;
}
uio->uio_offset = pos;
return (error);
}
/*
*/
static int
devfs_readlink(struct vop_readlink_args *ap)
/*struct vop_readlink_args {
struct vnode *a_vp;
struct uio *a_uio;
struct ucred *a_cred;
} */
{
struct vnode *vp = ap->a_vp;
struct uio *uio = ap->a_uio;
dn_p lnk_node;
int error = 0;
DBPRINT(("readlink\n"));
/* set up refs to dir */
if ((error = devfs_vntodn(vp,&lnk_node)) != 0)
return error;
if(lnk_node->type != DEV_SLNK)
return(EINVAL);
if ((error = VOP_ACCESS(vp, VREAD, ap->a_cred, NULL)) != 0) { /* XXX */
return error;
}
error = uiomove(lnk_node->by.Slnk.name, lnk_node->by.Slnk.namelen, uio);
return error;
}
#ifdef notyet
static int
devfs_abortop(struct vop_abortop_args *ap)
/*struct vop_abortop_args {
struct vnode *a_dvp;
struct componentname *a_cnp;
} */
{
DBPRINT(("abortop\n"));
if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
zfree(namei_zone, ap->a_cnp->cn_pnbuf);
return 0;
}
#endif /* notyet */
static int
devfs_reclaim(struct vop_reclaim_args *ap)
/*struct vop_reclaim_args {
struct vnode *a_vp;
} */
{
dn_p file_node = NULL;
int error;
DBPRINT(("reclaim\n"));
if ((error = devfs_vntodn(ap->a_vp,&file_node)) != 0)
{
printf("devfs_vntodn returned %d ",error);
return error;
}
ap->a_vp->v_data = NULL;
if (file_node) {
file_node->vn = 0;
file_node->vn_id = 0;
}
return(0);
}
/*
* Print out the contents of a /devfs vnode.
*/
static int
devfs_print(struct vop_print_args *ap)
/*struct vop_print_args {
struct vnode *a_vp;
} */
{
printf("tag VT_DEVFS, devfs vnode\n");
return (0);
}
/**************************************************************************\
* pseudo ops *
\**************************************************************************/
/*proto*/
void
devfs_dropvnode(dn_p dnp)
{
struct vnode *vn_p;
#ifdef PARANOID
if(!dnp)
{
printf("devfs: dn count dropped too early\n");
}
#endif
vn_p = dnp->vn;
/*
* check if we have a vnode.......
*/
if((vn_p) && ( dnp->vn_id == vn_p->v_id) && (dnp == (dn_p)vn_p->v_data))
{
VOP_REVOKE(vn_p, REVOKEALL);
}
dnp->vn = NULL; /* be pedantic about this */
}
/* struct vnode *speclisth[SPECHSZ];*/ /* till specfs goes away */
/*
* Open a special file.
struct vop_open_args {
struct vnode *a_vp;
int a_mode;
struct ucred *a_cred;
struct proc *a_p;
} *ap;
*/
/* ARGSUSED */
static int
devfs_open( struct vop_open_args *ap)
{
struct proc *p = ap->a_p;
struct vnode *vp = ap->a_vp;
int error;
dn_p dnp;
if ((error = devfs_vntodn(vp,&dnp)) != 0)
return error;
switch (vp->v_type) {
case VCHR:
VOP_UNLOCK(vp, 0, p);
error = (*dnp->by.Cdev.cdevsw->d_open)(
dnp->by.Cdev.dev,
ap->a_mode,
S_IFCHR,
p);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
return (error);
/* NOT REACHED */
case VBLK:
error = (*dnp->by.Bdev.bdevsw->d_open)(
dnp->by.Bdev.dev,
ap->a_mode,
S_IFBLK,
p);
break;
default:
break;
}
return (error);
}
/*
* Vnode op for read
struct vop_read_args {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
}
*/
/* ARGSUSED */
static int
devfs_read( struct vop_read_args *ap)
{
register struct vnode *vp = ap->a_vp;
register struct uio *uio = ap->a_uio;
struct proc *p = uio->uio_procp;
struct buf *bp;
daddr_t bn, nextbn;
long bsize, bscale;
struct partinfo dpart;
int n, on;
d_ioctl_t *ioctl;
int error = 0;
dev_t dev;
dn_p dnp;
if ((error = devfs_vntodn(vp,&dnp)) != 0)
return error;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
panic("devfs_read mode");
if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
panic("devfs_read proc");
#endif
if (uio->uio_resid == 0)
return (0);
switch (vp->v_type) {
case VCHR:
VOP_UNLOCK(vp, 0, p);
error = (*dnp->by.Cdev.cdevsw->d_read)
(dnp->by.Cdev.dev, uio, ap->a_ioflag);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
break;
case VBLK:
if (uio->uio_offset < 0)
return (EINVAL);
bsize = BLKDEV_IOSIZE;
dev = dnp->by.Bdev.dev;
/*
* This is a hack!
*/
if ( (ioctl = dnp->by.Bdev.bdevsw->d_ioctl) != NULL &&
(*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
dpart.part->p_fstype == FS_BSDFFS &&
dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
bsize = dpart.part->p_frag * dpart.part->p_fsize;
bscale = btodb(bsize);
/*
* Get buffers with this data from the buffer cache.
* If it's not there the strategy() entrypoint will be called.
* We may do this in several chunks.
*/
do {
bn = btodb(uio->uio_offset) & ~(bscale - 1);
on = uio->uio_offset % bsize;
n = min((unsigned)(bsize - on), uio->uio_resid);
if (vp->v_lastr + bscale == bn) {
nextbn = bn + bscale;
error = breadn(vp, bn, (int)bsize, &nextbn,
(int *)&bsize, 1, NOCRED, &bp);
} else
error = bread(vp, bn, (int)bsize, NOCRED, &bp);
vp->v_lastr = bn;
n = min(n, bsize - bp->b_resid);
if (error) {
brelse(bp);
return (error);
}
/*
* Copy it to the user's space
*/
error = uiomove((char *)bp->b_data + on, n, uio);
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
break;
default:
panic("devfs_read type");
}
if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
dnp->flags |= IN_ACCESS;
return (error);
}
/*
* Vnode op for write
struct vop_write_args {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
}
*/
/* ARGSUSED */
static int
devfs_write( struct vop_write_args *ap)
{
register struct vnode *vp = ap->a_vp;
register struct uio *uio = ap->a_uio;
struct proc *p = uio->uio_procp;
struct buf *bp;
daddr_t bn;
int bsize, blkmask;
struct partinfo dpart;
register int n, on;
int error = 0;
dn_p dnp;
if ((error = devfs_vntodn(vp,&dnp)) != 0)
return error;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
panic("devfs_write mode");
if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
panic("devfs_write proc");
#endif
switch (vp->v_type) {
case VCHR:
VOP_UNLOCK(vp, 0, p);
error = (*dnp->by.Cdev.cdevsw->d_write)
(dnp->by.Cdev.dev, uio, ap->a_ioflag);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
return (error);
case VBLK:
if (uio->uio_resid == 0)
return (0);
if (uio->uio_offset < 0)
return (EINVAL);
bsize = BLKDEV_IOSIZE;
if ((dnp->by.Bdev.bdevsw->d_ioctl != NULL)
&& ((*dnp->by.Bdev.bdevsw->d_ioctl)(dnp->by.Bdev.dev, DIOCGPART,
(caddr_t)&dpart, FREAD, p) == 0)
&& (dpart.part->p_fstype == FS_BSDFFS)
&& (dpart.part->p_frag != 0)
&& (dpart.part->p_fsize != 0)) {
bsize = dpart.part->p_frag * dpart.part->p_fsize;
}
blkmask = btodb(bsize) - 1;
do {
bn = btodb(uio->uio_offset) & ~blkmask;
on = uio->uio_offset % bsize;
n = min((unsigned)(bsize - on), uio->uio_resid);
if (n == bsize)
bp = getblk(vp, bn, bsize, 0, 0);
else
error = bread(vp, bn, bsize, NOCRED, &bp);
if (error) {
brelse(bp);
return (error);
}
n = min(n, bsize - bp->b_resid);
error = uiomove((char *)bp->b_data + on, n, uio);
if (n + on == bsize)
bawrite(bp);
else
bdwrite(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
return (error);
default:
panic("devfs_write type");
}
/* NOTREACHED */
}
/*
* Device ioctl operation.
struct vop_ioctl_args {
struct vnode *a_vp;
int a_command;
caddr_t a_data;
int a_fflag;
struct ucred *a_cred;
struct proc *a_p;
}
*/
/* ARGSUSED */
static int
devfs_ioctl(struct vop_ioctl_args *ap)
{
dn_p dnp;
int error;
if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
return error;
switch (ap->a_vp->v_type) {
case VCHR:
return ((*dnp->by.Cdev.cdevsw->d_ioctl)(dnp->by.Cdev.dev,
ap->a_command,
ap->a_data,
ap->a_fflag,
ap->a_p));
case VBLK:
return ((*dnp->by.Bdev.bdevsw->d_ioctl)(dnp->by.Bdev.dev,
ap->a_command,
ap->a_data,
ap->a_fflag,
ap->a_p));
default:
panic("devfs_ioctl");
/* NOTREACHED */
}
}
/*
struct vop_poll_args {
struct vnode *a_vp;
int a_events;
struct ucred *a_cred;
struct proc *a_p;
} *ap;
*/
/* ARGSUSED */
static int
devfs_poll(struct vop_poll_args *ap)
{
dn_p dnp;
int error;
if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
return error;
switch (ap->a_vp->v_type) {
case VCHR:
return (*dnp->by.Cdev.cdevsw->d_poll)(dnp->by.Cdev.dev,
ap->a_events,
ap->a_p);
default:
return (vop_defaultop((struct vop_generic_args *)ap));
}
}
/*
* Synch buffers associated with a block device
struct vop_fsync_args {
struct vnode *a_vp;
struct ucred *a_cred;
int a_waitfor;
struct proc *a_p;
}
*/
/* ARGSUSED */
static int
devfs_fsync(struct vop_fsync_args *ap)
{
register struct vnode *vp = ap->a_vp;
register struct buf *bp;
struct buf *nbp;
int s;
dn_p dnp;
int error;
if ((error = devfs_vntodn(vp,&dnp)) != 0)
return error;
if (vp->v_type == VCHR)
return (0);
/*
* Flush all dirty buffers associated with a block device.
*/
loop:
s = splbio();
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
if ((bp->b_flags & B_BUSY))
continue;
if ((bp->b_flags & B_DELWRI) == 0)
panic("devfs_fsync: not dirty");
if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
vfs_bio_awrite(bp);
splx(s);
} else {
bremfree(bp);
bp->b_flags |= B_BUSY;
splx(s);
bawrite(bp);
}
goto loop;
}
if (ap->a_waitfor == MNT_WAIT) {
while (vp->v_numoutput) {
vp->v_flag |= VBWAIT;
(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
}
#ifdef DIAGNOSTIC
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
vprint("devfs_fsync: dirty", vp);
splx(s);
goto loop;
}
#endif
}
splx(s);
return (0);
}
/*
*
* struct vop_inactive_args {
* struct vnode *a_vp;
* struct proc *a_p;
* }
*/
static int
devfs_inactive(struct vop_inactive_args *ap)
{
VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
return (0);
}
/*
* Just call the device strategy routine
struct vop_strategy_args {
struct vnode *a_vp;
struct buf *a_bp;
}
*/
static int
devfs_strategy(struct vop_strategy_args *ap)
{
struct buf *bp = ap->a_bp;
dn_p dnp;
int error;
if ((ap->a_vp->v_type != VCHR)
&& (ap->a_vp->v_type != VBLK))
panic ("devfs_strat:badvnode type");
if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
return error;
if (((bp->b_flags & B_READ) == 0) &&
(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
(*bioops.io_start)(bp);
switch (ap->a_vp->v_type) {
case VCHR:
(*dnp->by.Cdev.cdevsw->d_strategy)(bp);
break;
case VBLK:
(*dnp->by.Bdev.bdevsw->d_strategy)(bp);
break;
default:
/* XXX set error code? */
break;
}
return (0);
}
/*
* This is a noop, simply returning what one has been given.
struct vop_bmap_args {
struct vnode *a_vp;
daddr_t a_bn;
struct vnode **a_vpp;
daddr_t *a_bnp;
int *a_runp;
int *a_runb;
}
*/
static int
devfs_bmap(struct vop_bmap_args *ap)
{
if (ap->a_vpp != NULL)
*ap->a_vpp = ap->a_vp;
if (ap->a_bnp != NULL)
*ap->a_bnp = ap->a_bn;
if (ap->a_runp != NULL)
*ap->a_runp = 0;
if (ap->a_runb != NULL)
*ap->a_runb = 0;
return (0);
}
/*
* Device close routine
struct vop_close_args {
struct vnode *a_vp;
int a_fflag;
struct ucred *a_cred;
struct proc *a_p;
}
*/
/* ARGSUSED */
static int
devfs_close(struct vop_close_args *ap)
{
register struct vnode *vp = ap->a_vp;
int error;
dn_p dnp;
if ((error = devfs_vntodn(vp,&dnp)) != 0)
return error;
switch (vp->v_type) {
case VCHR:
/*
* Hack: a tty device that is a controlling terminal
* has a reference from the session structure.
* We cannot easily tell that a character device is
* a controlling terminal, unless it is the closing
* process' controlling terminal. In that case,
* if the reference count is 2 (this last descriptor
* plus the session), release the reference from the session.
*/
if (vcount(vp) == 2 && ap->a_p &&
(vp->v_flag & VXLOCK) == 0 &&
vp == ap->a_p->p_session->s_ttyvp) {
vrele(vp);
ap->a_p->p_session->s_ttyvp = NULL;
}
/*
* If the vnode is locked, then we are in the midst
* of forcably closing the device, otherwise we only
* close on last reference.
*/
if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
return (0);
return ((*dnp->by.Cdev.cdevsw->d_close)(dnp->by.Cdev.dev,
ap->a_fflag,
S_IFCHR,
ap->a_p));
/* NOT REACHED */
case VBLK:
/*
* On last close of a block device (that isn't mounted)
* we must invalidate any in core blocks, so that
* we can, for instance, change floppy disks.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
VOP_UNLOCK(vp, 0, ap->a_p);
if (error)
return (error);
/*
* We do not want to really close the device if it
* is still in use unless we are trying to close it
* forcibly. Since every use (buffer, vnode, swap, cmap)
* holds a reference to the vnode, and because we mark
* any other vnodes that alias this device, when the
* sum of the reference counts on all the aliased
* vnodes descends to one, we are on last close.
*/
if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
return (0);
return ((*dnp->by.Bdev.bdevsw->d_close)(dnp->by.Bdev.dev,
ap->a_fflag,
S_IFBLK,
ap->a_p));
/* NOT REACHED */
default:
panic("devfs_close: not special");
}
}
/*
* Print out the contents of a special device vnode.
struct vop_print_args {
struct vnode *a_vp;
}
*/
/*
* Special device advisory byte-level locks.
struct vop_advlock_args {
struct vnode *a_vp;
caddr_t a_id;
int a_op;
struct flock *a_fl;
int a_flags;
}
*/
/* ARGSUSED */
static int
devfs_advlock(struct vop_advlock_args *ap)
{
return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
}
/*
* Special device bad operation
*/
static int
devfs_badop(void)
{
panic("devfs_badop called");
/* NOTREACHED */
}
static void
devfs_getpages_iodone(struct buf *bp)
{
bp->b_flags |= B_DONE;
wakeup(bp);
}
static int
devfs_getpages(struct vop_getpages_args *ap)
{
vm_offset_t kva;
int error;
int i, pcount, size, s;
daddr_t blkno;
struct buf *bp;
vm_page_t m;
vm_ooffset_t offset;
int toff, nextoff, nread;
struct vnode *vp = ap->a_vp;
int blksiz;
int gotreqpage;
error = 0;
pcount = round_page(ap->a_count) / PAGE_SIZE;
/*
* Calculate the offset of the transfer.
*/
offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
/* XXX sanity check before we go into details. */
/* XXX limits should be defined elsewhere. */
#define DADDR_T_BIT 32
#define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
if (offset < 0 || offset > OFFSET_MAX) {
/* XXX still no %q in kernel. */
printf("devfs_getpages: preposterous offset 0x%x%08x\n",
(u_int)((u_quad_t)offset >> 32),
(u_int)(offset & 0xffffffff));
return (VM_PAGER_ERROR);
}
blkno = btodb(offset);
/*
* Round up physical size for real devices, use the
* fundamental blocksize of the fs if possible.
*/
if (vp && vp->v_mount) {
if (vp->v_type != VBLK) {
vprint("Non VBLK", vp);
}
blksiz = vp->v_mount->mnt_stat.f_bsize;
if (blksiz < DEV_BSIZE) {
blksiz = DEV_BSIZE;
}
}
else
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
* Map the pages to be read into the kva.
*/
pmap_qenter(kva, ap->a_m, pcount);
/* Build a minimal buffer header. */
bp->b_flags = B_BUSY | B_READ | B_CALL;
bp->b_iodone = devfs_getpages_iodone;
/* B_PHYS is not set, but it is nice to fill this in. */
bp->b_proc = curproc;
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
if (bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if (bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
bp->b_blkno = blkno;
bp->b_lblkno = blkno;
pbgetvp(ap->a_vp, bp);
bp->b_bcount = size;
bp->b_bufsize = size;
bp->b_resid = 0;
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
/* Do the input. */
VOP_STRATEGY(bp->b_vp, bp);
s = splbio();
/* We definitely need to be at splbio here. */
while ((bp->b_flags & B_DONE) == 0)
tsleep(bp, PVM, "spread", 0);
splx(s);
if ((bp->b_flags & B_ERROR) != 0) {
if (bp->b_error)
error = bp->b_error;
else
error = EIO;
}
nread = size - bp->b_resid;
if (nread < ap->a_count) {
bzero((caddr_t)kva + nread,
ap->a_count - nread);
}
pmap_qremove(kva, pcount);
gotreqpage = 0;
for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
nextoff = toff + PAGE_SIZE;
m = ap->a_m[i];
m->flags &= ~PG_ZERO;
if (nextoff <= nread) {
m->valid = VM_PAGE_BITS_ALL;
m->dirty = 0;
} else if (toff < nread) {
int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
vm_page_set_validclean(m, 0, nvalid);
} else {
m->valid = 0;
m->dirty = 0;
}
if (i != ap->a_reqpage) {
/*
* Just in case someone was asking for this page we
* now tell them that it is ok to use.
*/
if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
if (m->valid) {
if (m->flags & PG_WANTED) {
vm_page_activate(m);
} else {
vm_page_deactivate(m);
}
vm_page_wakeup(m);
} else {
vm_page_free(m);
}
} else {
vm_page_free(m);
}
} else if (m->valid) {
gotreqpage = 1;
}
}
if (!gotreqpage) {
m = ap->a_m[ap->a_reqpage];
#ifndef MAX_PERF
printf("devfs_getpages: I/O read failure: (error code=%d)\n", error);
printf(" size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
size, bp->b_resid, ap->a_count, m->valid);
printf(" nread: %d, reqpage: %d, pindex: %d, pcount: %d\n",
nread, ap->a_reqpage, m->pindex, pcount);
#endif
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
return VM_PAGER_ERROR;
}
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
return VM_PAGER_OK;
}
/* These are the operations used by directories etc in a devfs */
vop_t **devfs_vnodeop_p;
static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) vop_defaultop },
{ &vop_access_desc, (vop_t *) devfs_access },
{ &vop_bmap_desc, (vop_t *) devfs_badop },
{ &vop_getattr_desc, (vop_t *) devfs_getattr },
{ &vop_inactive_desc, (vop_t *) devfs_inactive },
{ &vop_link_desc, (vop_t *) devfs_link },
{ &vop_lookup_desc, (vop_t *) devfs_lookup },
{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
{ &vop_print_desc, (vop_t *) devfs_print },
{ &vop_read_desc, (vop_t *) devfs_xread },
{ &vop_readdir_desc, (vop_t *) devfs_readdir },
{ &vop_readlink_desc, (vop_t *) devfs_readlink },
{ &vop_reclaim_desc, (vop_t *) devfs_reclaim },
{ &vop_remove_desc, (vop_t *) devfs_remove },
{ &vop_rename_desc, (vop_t *) devfs_rename },
{ &vop_setattr_desc, (vop_t *) devfs_setattr },
{ &vop_symlink_desc, (vop_t *) devfs_symlink },
{ &vop_write_desc, (vop_t *) devfs_xwrite },
{ NULL, NULL }
};
static struct vnodeopv_desc devfs_vnodeop_opv_desc =
{ &devfs_vnodeop_p, devfs_vnodeop_entries };
VNODEOP_SET(devfs_vnodeop_opv_desc);
vop_t **devfs_spec_vnodeop_p;
static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) vop_defaultop },
{ &vop_access_desc, (vop_t *) devfs_access },
{ &vop_advlock_desc, (vop_t *) devfs_advlock },
{ &vop_bmap_desc, (vop_t *) devfs_bmap },
{ &vop_close_desc, (vop_t *) devfs_close },
{ &vop_create_desc, (vop_t *) devfs_badop },
{ &vop_fsync_desc, (vop_t *) devfs_fsync },
{ &vop_getattr_desc, (vop_t *) devfs_getattr },
{ &vop_getpages_desc, (vop_t *) devfs_getpages },
{ &vop_inactive_desc, (vop_t *) devfs_inactive },
{ &vop_ioctl_desc, (vop_t *) devfs_ioctl },
{ &vop_lease_desc, (vop_t *) vop_null },
{ &vop_link_desc, (vop_t *) devfs_badop },
{ &vop_lookup_desc, (vop_t *) devfs_lookup },
{ &vop_mkdir_desc, (vop_t *) devfs_badop },
{ &vop_mknod_desc, (vop_t *) devfs_badop },
{ &vop_open_desc, (vop_t *) devfs_open },
{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
{ &vop_poll_desc, (vop_t *) devfs_poll },
{ &vop_print_desc, (vop_t *) devfs_print },
{ &vop_read_desc, (vop_t *) devfs_read },
{ &vop_readdir_desc, (vop_t *) devfs_badop },
{ &vop_readlink_desc, (vop_t *) devfs_badop },
{ &vop_reallocblks_desc, (vop_t *) devfs_badop },
{ &vop_reclaim_desc, (vop_t *) devfs_reclaim },
{ &vop_remove_desc, (vop_t *) devfs_badop },
{ &vop_rename_desc, (vop_t *) devfs_badop },
{ &vop_rmdir_desc, (vop_t *) devfs_badop },
{ &vop_setattr_desc, (vop_t *) devfs_setattr },
{ &vop_strategy_desc, (vop_t *) devfs_strategy },
{ &vop_symlink_desc, (vop_t *) devfs_symlink },
{ &vop_write_desc, (vop_t *) devfs_write },
{ NULL, NULL }
};
static struct vnodeopv_desc devfs_spec_vnodeop_opv_desc =
{ &devfs_spec_vnodeop_p, devfs_spec_vnodeop_entries };
VNODEOP_SET(devfs_spec_vnodeop_opv_desc);