freebsd-skq/sys/fs/coda/coda_psdev.c

/*-
 *             Coda: an Experimental Distributed File System
 *                              Release 3.1
 *
 *           Copyright (c) 1987-1998 Carnegie Mellon University
 *                          All Rights Reserved
 *
 * Permission  to  use, copy, modify and distribute this software and its
 * documentation is hereby granted,  provided  that  both  the  copyright
 * notice  and  this  permission  notice  appear  in  all  copies  of the
 * software, derivative works or  modified  versions,  and  any  portions
 * thereof, and that both notices appear in supporting documentation, and
 * that credit is given to Carnegie Mellon University  in  all  documents
 * and publicity pertaining to direct or indirect use of this code or its
 * derivatives.
 *
 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
 * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
 * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
 * ANY DERIVATIVE WORK.
 *
 * Carnegie  Mellon  encourages  users  of  this  software  to return any
 * improvements or extensions that  they  make,  and  to  grant  Carnegie
 * Mellon the rights to redistribute these changes without encumbrance.
 *
 * 	@(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
 */
/*-
 * Mach Operating System
 * Copyright (c) 1989 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */

/*
 * This code was written for the Coda filesystem at Carnegie Mellon
 * University.  Contributers include David Steere, James Kistler, and
 * M. Satyanarayanan.  */

/*
 * These routines define the psuedo device for communication between Coda's
 * Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, but I
 * moved them to make it easier to port the Minicache without porting coda.
 * -- DCS 10/12/94
 */

/*
 * These routines are the device entry points for Venus.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/file.h>		/* must come after sys/malloc.h */
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/filedesc.h>

#include <fs/coda/coda.h>
#include <fs/coda/cnode.h>
#include <fs/coda/coda_io.h>
#include <fs/coda/coda_psdev.h>

/*
 * Variables to determine how Coda sleeps and whether or not it is
 * interruptible when it does sleep waiting for Venus.
 */
/* #define	CTL_C */

#ifdef CTL_C
#include <sys/signalvar.h>
#endif

int coda_psdev_print_entry = 0;
static int outstanding_upcalls = 0;
int coda_call_sleep = PZERO - 1;
#ifdef CTL_C
int coda_pcatch = PCATCH;
#else
#endif

#define	ENTRY do {							\
	if (coda_psdev_print_entry)					\
		myprintf(("Entered %s\n", __func__));			\
} while (0)

struct vmsg {
	TAILQ_ENTRY(vmsg)	vm_chain;
	caddr_t		vm_data;
	u_short		vm_flags;
	u_short		vm_inSize;	/* Size is at most 5000 bytes */
	u_short		vm_outSize;
	u_short		vm_opcode;	/* Copied from data to save ptr deref */
	int		vm_unique;
	caddr_t		vm_sleep;	/* Not used by Mach. */
};

#define	VM_READ		1
#define	VM_WRITE	2
#define	VM_INTR		4		/* Unused. */

int
vc_open(struct cdev *dev, int flag, int mode, struct thread *td)
{
	struct vcomm *vcp;
	struct coda_mntinfo *mnt;

	ENTRY;
	mnt = dev2coda_mntinfo(dev);
	KASSERT(mnt, ("Coda: tried to open uninitialized cfs device"));
	vcp = &mnt->mi_vcomm;
	if (VC_OPEN(vcp))
		return (EBUSY);
	bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
	TAILQ_INIT(&vcp->vc_requests);
	TAILQ_INIT(&vcp->vc_replies);
	MARK_VC_OPEN(vcp);
	mnt->mi_vfsp = NULL;
	mnt->mi_rootvp = NULL;
	return (0);
}

int
vc_close(struct cdev *dev, int flag, int mode, struct thread *td)
{
	struct vcomm *vcp;
	struct vmsg *vmp, *nvmp = NULL;
	struct coda_mntinfo *mi;
	int err;

	ENTRY;
	mi = dev2coda_mntinfo(dev);
	KASSERT(mi, ("Coda: closing unknown cfs device"));
	vcp = &mi->mi_vcomm;
	KASSERT(VC_OPEN(vcp), ("Coda: closing unopened cfs device"));

	/*
	 * Prevent future operations on this vfs from succeeding by
	 * auto-unmounting any vfs mounted via this device.  This frees user
	 * or sysadm from having to remember where all mount points are
	 * located.  Put this before WAKEUPs to avoid queuing new messages
	 * between the WAKEUP and the unmount (which can happen if we're
	 * unlucky).
	 */
	if (mi->mi_rootvp == NULL) {
		/*
		 * Just a simple open/close with no mount.
		 */
		MARK_VC_CLOSED(vcp);
		return (0);
	}

	/*
	 * Let unmount know this is for real.
	 */
	VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
	coda_unmounting(mi->mi_vfsp);

	/*
	 * Wakeup clients so they can return.
	 */
	outstanding_upcalls = 0;
	TAILQ_FOREACH_SAFE(vmp, &vcp->vc_requests, vm_chain, nvmp) {
		/*
		 * Free signal request messages and don't wakeup cause no one
		 * is waiting.
		 */
		if (vmp->vm_opcode == CODA_SIGNAL) {
			CODA_FREE((caddr_t)vmp->vm_data,
			    (u_int)VC_IN_NO_DATA);
			CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
			continue;
		}
		outstanding_upcalls++;
		wakeup(&vmp->vm_sleep);
	}
	TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
		outstanding_upcalls++;
		wakeup(&vmp->vm_sleep);
	}
	MARK_VC_CLOSED(vcp);
	if (outstanding_upcalls) {
#ifdef CODA_VERBOSE
		printf("presleep: outstanding_upcalls = %d\n",
		    outstanding_upcalls);
#endif
    		(void) tsleep(&outstanding_upcalls, coda_call_sleep,
		    "coda_umount", 0);
#ifdef CODA_VERBOSE
		printf("postsleep: outstanding_upcalls = %d\n",
		    outstanding_upcalls);
#endif
	}
	err = dounmount(mi->mi_vfsp, flag, td);
	if (err)
		myprintf(("Error %d unmounting vfs in vcclose(%s)\n", err,
		    devtoname(dev)));
	return (0);
}

int
vc_read(struct cdev *dev, struct uio *uiop, int flag)
{
	struct vcomm *vcp;
	struct vmsg *vmp;
	int error = 0;

	ENTRY;
	vcp = &dev2coda_mntinfo(dev)->mi_vcomm;

	/*
	 * Get message at head of request queue.
	 */
	vmp = TAILQ_FIRST(&vcp->vc_requests);
	if (vmp == NULL)
		return (0);	/* Nothing to read */

	/*
	 * Move the input args into userspace.
	 *
	 * XXXRW: This is not safe in the presence of >1 reader, as vmp is
	 * still on the head of the list.
	 */
	uiop->uio_rw = UIO_READ;
	error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
	if (error) {
		myprintf(("vcread: error (%d) on uiomove\n", error));
		error = EINVAL;
	}
	TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);

	/*
	 * If request was a signal, free up the message and don't enqueue it
	 * in the reply queue.
	 */
	if (vmp->vm_opcode == CODA_SIGNAL) {
		if (codadebug)
			myprintf(("vcread: signal msg (%d, %d)\n",
			    vmp->vm_opcode, vmp->vm_unique));
		CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
		CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
		return (error);
	}
	vmp->vm_flags |= VM_READ;
	TAILQ_INSERT_TAIL(&vcp->vc_replies, vmp, vm_chain);
	return (error);
}

int
vc_write(struct cdev *dev, struct uio *uiop, int flag)
{
	struct vcomm *vcp;
	struct vmsg *vmp;
	struct coda_out_hdr *out;
	u_long seq;
	u_long opcode;
	int buf[2];
	int error = 0;

	ENTRY;
	vcp = &dev2coda_mntinfo(dev)->mi_vcomm;

	/*
	 * Peek at the opcode, unique without transfering the data.
	 */
	uiop->uio_rw = UIO_WRITE;
	error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
	if (error) {
		myprintf(("vcwrite: error (%d) on uiomove\n", error));
		return (EINVAL);
	}
	opcode = buf[0];
	seq = buf[1];
	if (codadebug)
		myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
	if (DOWNCALL(opcode)) {
		union outputArgs pbuf;

		/*
		 * Get the rest of the data.
		 */
		uiop->uio_rw = UIO_WRITE;
		error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result,
		    sizeof(pbuf) - (sizeof(int)*2), uiop);
		if (error) {
			myprintf(("vcwrite: error (%d) on uiomove (Op %ld "
			    "seq %ld)\n", error, opcode, seq));
			return (EINVAL);
		}
		return (handleDownCall(dev2coda_mntinfo(dev), opcode, &pbuf));
	}

	/*
	 * Look for the message on the (waiting for) reply queue.
	 */
	TAILQ_FOREACH(vmp, &vcp->vc_replies, vm_chain) {
		if (vmp->vm_unique == seq)
			break;
	}
	if (vmp == NULL) {
		if (codadebug)
			myprintf(("vcwrite: msg (%ld, %ld) not found\n",
			    opcode, seq));
		return (ESRCH);
	}

	/*
	 * Remove the message from the reply queue.
	 */
	TAILQ_REMOVE(&vcp->vc_replies, vmp, vm_chain);

	/*
	 * Move data into response buffer.
	 */
	out = (struct coda_out_hdr *)vmp->vm_data;

	/*
	 * Don't need to copy opcode and uniquifier.
	 *
	 * Get the rest of the data.
	 */
	if (vmp->vm_outSize < uiop->uio_resid) {
		myprintf(("vcwrite: more data than asked for (%d < %zd)\n",
		    vmp->vm_outSize, uiop->uio_resid));

		/*
		 * Notify caller of the error.
		 */
		wakeup(&vmp->vm_sleep);
		return (EINVAL);
	}

	/*
	 * Save the value.
	 */
	buf[0] = uiop->uio_resid;
	uiop->uio_rw = UIO_WRITE;
	error = uiomove((caddr_t) &out->result, vmp->vm_outSize -
	    (sizeof(int) * 2), uiop);
	if (error) {
		myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
		    error, opcode, seq));
		return (EINVAL);
	}

	/*
	 * I don't think these are used, but just in case.
	 *
	 * XXX - aren't these two already correct? -bnoble
	 */
	out->opcode = opcode;
	out->unique = seq;
	vmp->vm_outSize	= buf[0];	/* Amount of data transferred? */
	vmp->vm_flags |= VM_WRITE;
	error = 0;
	if (opcode == CODA_OPEN_BY_FD) {
		struct coda_open_by_fd_out *tmp =
		    (struct coda_open_by_fd_out *)out;
		struct file *fp;
		struct vnode *vp = NULL;

		if (tmp->oh.result == 0) {
			error = getvnode(uiop->uio_td->td_proc->p_fd,
			    tmp->fd, &fp);
			if (!error) {
				/*
				 * XXX: Since the whole driver runs with
				 * Giant, don't actually need to acquire it
				 * explicitly here yet.
				 */
				mtx_lock(&Giant);
				vp = fp->f_vnode;
				VREF(vp);
				fdrop(fp, uiop->uio_td);
				mtx_unlock(&Giant);
			}
		}
		tmp->vp = vp;
	}
	wakeup(&vmp->vm_sleep);
	return (error);
}

int
vc_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
    struct thread *t)
{

	ENTRY;
	switch(cmd) {
	case CODARESIZE:
		return (ENODEV);

	case CODASTATS:
		return (ENODEV);

	case CODAPRINT:
		return (ENODEV);

	case CIOC_KERNEL_VERSION:
		switch (*(u_int *)addr) {
		case 0:
			*(u_int *)addr = coda_kernel_version;
			return (0);

		case 1:
		case 2:
			if (coda_kernel_version != *(u_int *)addr)
				return (ENOENT);
			else
			    	return (0);

		default:
			return (ENOENT);
		}

	default:
		return (EINVAL);
	}
}

int
vc_poll(struct cdev *dev, int events, struct thread *td)
{
	struct vcomm *vcp;
	int event_msk = 0;

	ENTRY;
	vcp = &dev2coda_mntinfo(dev)->mi_vcomm;
	event_msk = events & (POLLIN|POLLRDNORM);
	if (!event_msk)
		return (0);
	if (!TAILQ_EMPTY(&vcp->vc_requests))
		return (events & (POLLIN|POLLRDNORM));
	selrecord(td, &(vcp->vc_selproc));
	return (0);
}

/*
 * Statistics.
 */
struct coda_clstat coda_clstat;

/*
 * Key question: whether to sleep interuptably or uninteruptably when waiting
 * for Venus.  The former seems better (cause you can ^C a job), but then
 * GNU-EMACS completion breaks.  Use tsleep with no timeout, and no longjmp
 * happens.  But, when sleeping "uninterruptibly", we don't get told if it
 * returns abnormally (e.g. kill -9).
 */
int
coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize,
    caddr_t buffer)
{
	struct vcomm *vcp;
	struct vmsg *vmp;
	int error;
#ifdef CTL_C
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	sigset_t psig_omask;
	sigset_t tempset;
	int i;
#endif

	/*
	 * Unlikely, but could be a race condition with a dying warden.
	 */
	if (mntinfo == NULL)
		return ENODEV;
	vcp = &(mntinfo->mi_vcomm);
	coda_clstat.ncalls++;
	coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
	if (!VC_OPEN(vcp))
		return (ENODEV);
	CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));

	/*
	 * Format the request message.
	 */
	vmp->vm_data = buffer;
	vmp->vm_flags = 0;
	vmp->vm_inSize = inSize;
	vmp->vm_outSize
	    = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
	vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
	vmp->vm_unique = ++vcp->vc_seq;
	if (codadebug)
		myprintf(("Doing a call for %d.%d\n", vmp->vm_opcode,
		    vmp->vm_unique));

	/*
	 * Fill in the common input args.
	 */
	((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;

	/*
	 * Append msg to request queue and poke Venus.
	 */
	TAILQ_INSERT_TAIL(&vcp->vc_requests, vmp, vm_chain);
	selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);

	/*
	 * We can be interrupted while we wait for Venus to process our
	 * request.  If the interrupt occurs before Venus has read the
	 * request, we dequeue and return. If it occurs after the read but
	 * before the reply, we dequeue, send a signal message, and return.
	 * If it occurs after the reply we ignore it.  In no case do we want
	 * to restart the syscall.  If it was interrupted by a venus shutdown
	 * (vcclose), return ENODEV.
	 *
	 * Ignore return, we have to check anyway.
	 */
#ifdef CTL_C
	/*
	 * This is work in progress.  Setting coda_pcatch lets tsleep
	 * reawaken on a ^c or ^z.  The problem is that emacs sets certain
	 * interrupts as SA_RESTART.  This means that we should exit sleep
	 * handle the "signal" and then go to sleep again.  Mostly this is
	 * done by letting the syscall complete and be restarted.  We are not
	 * idempotent and can not do this.  A better solution is necessary.
	 */
	i = 0;
	PROC_LOCK(p);
	psig_omask = td->td_sigmask;
	do {
		error = msleep(&vmp->vm_sleep, &p->p_mtx,
		    (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
		if (error == 0)
			break;
		else if (error == EWOULDBLOCK) {
#ifdef CODA_VERBOSE
			printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
#endif
		}
		else {
			SIGEMPTYSET(tempset);
			SIGADDSET(tempset, SIGIO);
			if (SIGSETEQ(td->td_siglist, tempset)) {
				SIGADDSET(td->td_sigmask, SIGIO);
#ifdef CODA_VERBOSE
				printf("coda_call: tsleep returns %d SIGIO, "
				    "cnt %d\n", error, i);
#endif
			} else {
				SIGDELSET(tempset, SIGIO);
				SIGADDSET(tempset, SIGALRM);
				if (SIGSETEQ(td->td_siglist, tempset)) {
					SIGADDSET(td->td_sigmask, SIGALRM);
#ifdef CODA_VERBOSE
					printf("coda_call: tsleep returns "
					    "%d SIGALRM, cnt %d\n", error, i);
#endif
				} else {
#ifdef CODA_VERBOSE
					printf("coda_call: tsleep returns "
					    "%d, cnt %d\n", error, i);
#endif

#ifdef notyet
					tempset = td->td_siglist;
					SIGSETNAND(tempset, td->td_sigmask);
					printf("coda_call: siglist = %p, "
					    "sigmask = %p, mask %p\n",
					    td->td_siglist, td->td_sigmask,
					    tempset);
					break;
					SIGSETOR(td->td_sigmask, td->td_siglist);
					tempset = td->td_siglist;
					SIGSETNAND(tempset, td->td_sigmask);
					printf("coda_call: new mask, "
					    "siglist = %p, sigmask = %p, "
					    "mask %p\n", td->td_siglist,
					    td->td_sigmask, tempset);
#endif
				}
			}
		}
	} while (error && i++ < 128 && VC_OPEN(vcp));
	td->td_sigmask = psig_omask;
	signotify(td);
	PROC_UNLOCK(p);
#else
	(void)tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
#endif
	if (VC_OPEN(vcp)) {
		/*
		 * Venus is still alive.
		 *
		 * Op went through, interrupt or not...
		 */
		if (vmp->vm_flags & VM_WRITE) {
			error = 0;
			*outSize = vmp->vm_outSize;
		} else if (!(vmp->vm_flags & VM_READ)) {
			/* Interrupted before venus read it. */
#ifdef CODA_VERBOSE
			if (1)
#else
			if (codadebug)
#endif
				myprintf(("interrupted before read: op = "
				    "%d.%d, flags = %x\n", vmp->vm_opcode,
				    vmp->vm_unique, vmp->vm_flags));
			TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
			error = EINTR;
		} else {
			/*
			 * (!(vmp->vm_flags & VM_WRITE)) means interrupted
			 * after upcall started.
			 *
			 * Interrupted after start of upcall, send venus a
			 * signal.
			 */
			struct coda_in_hdr *dog;
			struct vmsg *svmp;

#ifdef CODA_VERBOSE
			if (1)
#else
			if (codadebug)
#endif
				myprintf(("Sending Venus a signal: op = "
				    "%d.%d, flags = %x\n", vmp->vm_opcode,
				    vmp->vm_unique, vmp->vm_flags));
			TAILQ_REMOVE(&vcp->vc_requests, vmp, vm_chain);
			error = EINTR;
			CODA_ALLOC(svmp, struct vmsg *, sizeof(struct vmsg));
			CODA_ALLOC((svmp->vm_data), char *,
			    sizeof(struct coda_in_hdr));
			dog = (struct coda_in_hdr *)svmp->vm_data;
			svmp->vm_flags = 0;
			dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
			dog->unique = svmp->vm_unique = vmp->vm_unique;
			svmp->vm_inSize = sizeof (struct coda_in_hdr);
/*??? rvb */		svmp->vm_outSize = sizeof (struct coda_in_hdr);
			if (codadebug)
				myprintf(("coda_call: enqueing signal msg "
				    "(%d, %d)\n", svmp->vm_opcode,
				    svmp->vm_unique));

			/*
			 * Insert at head of queue!
			 *
			 * XXXRW: Actually, the tail.
			 */
			TAILQ_INSERT_TAIL(&vcp->vc_requests, svmp, vm_chain);
			selwakeuppri(&(vcp->vc_selproc), coda_call_sleep);
		}
	} else {
		/* If venus died (!VC_OPEN(vcp)) */
		if (codadebug)
			myprintf(("vcclose woke op %d.%d flags %d\n",
			    vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
		error = ENODEV;
	}
	CODA_FREE(vmp, sizeof(struct vmsg));
	if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
		wakeup(&outstanding_upcalls);
	if (!error)
		error = ((struct coda_out_hdr *)buffer)->result;
	return (error);
}