For reasons unknown, the nfs locking code used a fifo to send requests to

userland and a dedicated system call to get replies.

The vnode-bypass of fifos broke this into a panic.

Ditch all the magic and create a device /dev/nfslock instead, and
use that for both directions apart from the shorter path, this is
also faster because the device driver runs Giant free using the
vnode bypass.

Noticed by:	marcel
This commit is contained in:
Poul-Henning Kamp 2004-12-06 08:31:32 +00:00
parent aa2ea23220
commit 8b431c9576
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=138430
5 changed files with 159 additions and 88 deletions

View File

@ -101,10 +101,6 @@
#ifdef NFS_NPROCS
#include <nfsclient/nfsstats.h>
#endif
/*
* Flags for nfsclnt() system call.
*/
#define NFSCLNT_LOCKDANS 0x200
/*
* vfs.nfs sysctl(3) identifiers

View File

@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/kernel.h> /* for hz */
#include <sys/limits.h>
@ -61,6 +62,150 @@ __FBSDID("$FreeBSD$");
#include <nfsclient/nfs_lock.h>
#include <nfsclient/nlminfo.h>
MALLOC_DEFINE(M_NFSLOCK, "NFS lock", "NFS lock request");
static int nfslockdans(struct thread *td, struct lockd_ans *ansp);
/*
* --------------------------------------------------------------------
* A miniature device driver which the userland uses to talk to us.
*
*/
static struct cdev *nfslock_dev;
static struct mtx nfslock_mtx;
static int nfslock_isopen;
static TAILQ_HEAD(,__lock_msg) nfslock_list;
static int
nfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
int error;
mtx_lock(&nfslock_mtx);
if (!nfslock_isopen) {
error = 0;
nfslock_isopen = 1;
} else {
error = EOPNOTSUPP;
}
mtx_unlock(&nfslock_mtx);
return (error);
}
static int
nfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
struct __lock_msg *lm;
mtx_lock(&nfslock_mtx);
nfslock_isopen = 0;
while (!TAILQ_EMPTY(&nfslock_list)) {
lm = TAILQ_FIRST(&nfslock_list);
/* XXX: answer request */
TAILQ_REMOVE(&nfslock_list, lm, lm_link);
free(lm, M_NFSLOCK);
}
mtx_unlock(&nfslock_mtx);
return (0);
}
static int
nfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
{
int error;
struct __lock_msg *lm;
if (uio->uio_resid != sizeof *lm)
return (EOPNOTSUPP);
lm = NULL;
error = 0;
mtx_lock(&nfslock_mtx);
while (TAILQ_EMPTY(&nfslock_list)) {
error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
"nfslockd", 0);
if (error)
break;
}
if (!error) {
lm = TAILQ_FIRST(&nfslock_list);
TAILQ_REMOVE(&nfslock_list, lm, lm_link);
}
mtx_unlock(&nfslock_mtx);
if (!error) {
error = uiomove(lm, sizeof *lm, uio);
free(lm, M_NFSLOCK);
}
return (error);
}
static int
nfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
{
struct lockd_ans la;
int error;
if (uio->uio_resid != sizeof la)
return (EOPNOTSUPP);
error = uiomove(&la, sizeof la, uio);
if (!error)
error = nfslockdans(curthread, &la);
return (error);
}
static int
nfslock_send(struct __lock_msg *lm)
{
struct __lock_msg *lm2;
int error;
error = 0;
lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
mtx_lock(&nfslock_mtx);
if (nfslock_isopen) {
memcpy(lm2, lm, sizeof *lm2);
TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
wakeup(&nfslock_list);
} else {
error = EOPNOTSUPP;
}
mtx_unlock(&nfslock_mtx);
if (error)
free(lm2, M_NFSLOCK);
return (error);
}
static struct cdevsw nfslock_cdevsw = {
.d_version = D_VERSION,
.d_open = nfslock_open,
.d_close = nfslock_close,
.d_read = nfslock_read,
.d_write = nfslock_write,
.d_name = "nfslock"
};
static int
nfslock_modevent(module_t mod __unused, int type, void *data __unused)
{
switch (type) {
case MOD_LOAD:
if (bootverbose)
printf("nfslock: pseudo-device\n");
mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
TAILQ_INIT(&nfslock_list);
nfslock_dev = make_dev(&nfslock_cdevsw, 0,
UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
return (0);
default:
return (EOPNOTSUPP);
}
}
DEV_MODULE(nfslock, nfslock_modevent, NULL);
MODULE_VERSION(nfslock, 1);
/*
* XXX
* We have to let the process know if the call succeeded. I'm using an extra
@ -76,12 +221,11 @@ int
nfs_dolock(struct vop_advlock_args *ap)
{
LOCKD_MSG msg;
struct nameidata nd;
struct thread *td;
struct vnode *vp, *wvp;
int error, error1;
struct vnode *vp;
int error;
struct flock *fl;
int fmode, ioflg;
int ioflg;
struct proc *p;
td = curthread;
@ -132,59 +276,14 @@ nfs_dolock(struct vop_advlock_args *ap)
msg.lm_nfsv3 = NFS_ISV3(vp);
cru2x(td->td_ucred, &msg.lm_cred);
/*
* Open the lock fifo. If for any reason we don't find the fifo, it
* means that the lock daemon isn't running. Translate any missing
* file error message for the user, otherwise the application will
* complain that the user's file is missing, which isn't the case.
* Note that we use proc0's cred, so the fifo is opened as root.
*
* XXX: Note that this behavior is relative to the root directory
* of the current process, and this may result in a variety of
* {functional, security} problems in chroot() environments.
*/
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, _PATH_LCKFIFO, td);
fmode = FFLAGS(O_WRONLY | O_NONBLOCK);
error = vn_open_cred(&nd, &fmode, 0, thread0.td_ucred, -1);
switch (error) {
case ENOENT:
case ENXIO:
/*
* Map a failure to find the fifo or no listener on the
* fifo to locking not being supported.
*/
return (EOPNOTSUPP);
case 0:
break;
default:
return (error);
}
wvp = nd.ni_vp;
VOP_UNLOCK(wvp, 0, td); /* vn_open leaves it locked */
ioflg = IO_UNIT | IO_NOMACCHECK;
for (;;) {
VOP_LEASE(wvp, td, thread0.td_ucred, LEASE_WRITE);
error = nfslock_send(&msg);
if (error)
return (error);
error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0,
UIO_SYSSPACE, ioflg, thread0.td_ucred, NOCRED, NULL, td);
if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
break;
}
/*
* If we're locking a file, wait for an answer. Unlocks succeed
* immediately.
*/
/* Unlocks succeed immediately. */
if (fl->l_type == F_UNLCK)
/*
* XXX this isn't exactly correct. The client side
* needs to continue sending it's unlock until
* it gets a responce back.
*/
break;
return (error);
/*
* retry after 20 seconds if we haven't gotten a responce yet.
@ -227,16 +326,14 @@ nfs_dolock(struct vop_advlock_args *ap)
break;
}
error1 = vn_close(wvp, FWRITE, thread0.td_ucred, td);
/* prefer any previous 'error' to our vn_close 'error1'. */
return (error != 0 ? error : error1);
return (error);
}
/*
* nfslockdans --
* NFS advisory byte-level locks answer from the lock daemon.
*/
int
static int
nfslockdans(struct thread *td, struct lockd_ans *ansp)
{
struct proc *targetp;
@ -283,3 +380,4 @@ nfslockdans(struct thread *td, struct lockd_ans *ansp)
PROC_UNLOCK(targetp);
return (0);
}

View File

@ -44,7 +44,7 @@
* and where lockd reads these requests.
*
*/
#define _PATH_LCKFIFO "/var/run/lock"
#define _PATH_NFSLCKDEV "nfslock"
/*
* This structure is used to uniquely identify the process which originated
@ -58,12 +58,13 @@ struct lockd_msg_ident {
int msg_seq; /* Sequence number of message */
};
#define LOCKD_MSG_VERSION 2
#define LOCKD_MSG_VERSION 3
/*
* The structure that the kernel hands us for each lock request.
*/
typedef struct __lock_msg {
TAILQ_ENTRY(__lock_msg) lm_link; /* internal linkage */
int lm_version; /* which version is this */
struct lockd_msg_ident lm_msg_ident; /* originator of the message */
struct flock lm_fl; /* The lock request. */
@ -88,5 +89,4 @@ struct lockd_ans {
#ifdef _KERNEL
int nfs_dolock(struct vop_advlock_args *ap);
int nfslockdans(struct thread *td, struct lockd_ans *ansp);
#endif

View File

@ -198,19 +198,6 @@ SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
static int nfs_defect = 0;
SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
int
nfsclnt(struct thread *td, struct nfsclnt_args *uap)
{
struct lockd_ans la;
int error;
if ((uap->flag & NFSCLNT_LOCKDANS) != 0) {
error = copyin(uap->argp, &la, sizeof(la));
return (error != 0 ? error : nfslockdans(td, &la));
}
return EINVAL;
}
/*
* Asynchronous I/O daemons for client nfs.
* They do read-ahead and write-behind operations on the block I/O cache.

View File

@ -96,9 +96,6 @@ int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfs_bufq nfs_bufq;
static int nfs_prev_nfsclnt_sy_narg;
static sy_call_t *nfs_prev_nfsclnt_sy_call;
/*
* and the reverse mapping from generic to Version 2 procedure numbers
*/
@ -416,11 +413,6 @@ nfs_init(struct vfsconf *vfsp)
TAILQ_INIT(&nfs_reqq);
callout_init(&nfs_callout, 0);
nfs_prev_nfsclnt_sy_narg = sysent[SYS_nfsclnt].sy_narg;
sysent[SYS_nfsclnt].sy_narg = 2;
nfs_prev_nfsclnt_sy_call = sysent[SYS_nfsclnt].sy_call;
sysent[SYS_nfsclnt].sy_call = (sy_call_t *)nfsclnt;
nfs_pbuf_freecnt = nswbuf / 2 + 1;
return (0);
@ -432,8 +424,6 @@ nfs_uninit(struct vfsconf *vfsp)
int i;
callout_stop(&nfs_callout);
sysent[SYS_nfsclnt].sy_narg = nfs_prev_nfsclnt_sy_narg;
sysent[SYS_nfsclnt].sy_call = nfs_prev_nfsclnt_sy_call;
KASSERT(TAILQ_EMPTY(&nfs_reqq),
("nfs_uninit: request queue not empty"));