Changes to make the NFS client MP safe.

Thanks to Kris Kennaway for testing and sending lots of bugs my way.
This commit is contained in:
Mohan Srinivasan 2006-05-19 00:04:24 +00:00
parent 067eb35dd0
commit f1cdf89911
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=158739
11 changed files with 923 additions and 450 deletions

View File

@ -563,6 +563,8 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
vfs_getnewfsid(mp);
nmp->nm_mountp = mp;
mtx_init(&nmp->nm_mtx, "NFS4mount lock", NULL, MTX_DEF);
nmp->nm_maxfilesize = 0xffffffffLL;
nmp->nm_timeo = NFS_TIMEO;
nmp->nm_retry = NFS_RETRANS;
@ -652,6 +654,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
if (mrep != NULL)
m_freem(mrep);
bad:
mtx_destroy(&nmp->nm_mtx);
nfs4_disconnect(nmp);
uma_zfree(nfsmount_zone, nmp);
FREE(nam, M_SONAME);
@ -698,6 +701,7 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
/* XXX there's a race condition here for SMP */
wakeup(&nfs4_daemonproc);
mtx_destroy(&nmp->nm_mtx);
uma_zfree(nfsmount_zone, nmp);
return (0);
}

View File

@ -131,6 +131,7 @@ extern struct uma_zone *nfsmount_zone;
extern struct callout nfs_callout;
extern struct nfsstats nfsstats;
extern struct mtx nfs_iod_mtx;
extern int nfs_numasync;
extern unsigned int nfs_iodmax;
@ -178,6 +179,7 @@ struct nfsreq {
int r_rtt; /* RTT for rpc */
int r_lastmsg; /* last tprintf */
struct thread *r_td; /* Proc that did I/O system call */
struct mtx r_mtx; /* Protects nfsreq fields */
};
/*
@ -310,8 +312,6 @@ int nfs_meta_setsize (struct vnode *, struct ucred *,
void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset));
void nfs_restore_sigmask __P((struct thread *td, sigset_t *set));
int nfs_tsleep __P((struct thread *td, void *ident, int priority, char *wmesg,
int timo));
int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority,
char *wmesg, int timo));

View File

@ -71,6 +71,7 @@ static int nfs_directio_write(struct vnode *vp, struct uio *uiop,
extern int nfs_directio_enable;
extern int nfs_directio_allow_mmap;
/*
* Vnode op for VM getpages.
*/
@ -90,8 +91,6 @@ nfs_getpages(struct vop_getpages_args *ap)
vm_page_t *pages;
struct nfsnode *np;
GIANT_REQUIRED;
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
@ -101,22 +100,28 @@ nfs_getpages(struct vop_getpages_args *ap)
count = ap->a_count;
if ((object = vp->v_object) == NULL) {
printf("nfs_getpages: called with non-merged cache vnode??\n");
nfs_printf("nfs_getpages: called with non-merged cache vnode??\n");
return VM_PAGER_ERROR;
}
if (nfs_directio_enable && !nfs_directio_allow_mmap &&
(np->n_flag & NNONCACHE) &&
(vp->v_type == VREG)) {
printf("nfs_getpages: called on non-cacheable vnode??\n");
return VM_PAGER_ERROR;
if (nfs_directio_enable && !nfs_directio_allow_mmap) {
mtx_lock(&np->n_mtx);
if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
mtx_unlock(&np->n_mtx);
nfs_printf("nfs_getpages: called on non-cacheable vnode??\n");
return VM_PAGER_ERROR;
} else
mtx_unlock(&np->n_mtx);
}
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
mtx_unlock(&nmp->nm_mtx);
/* We'll never get here for v4, because we always have fsinfo */
(void)nfs_fsinfo(nmp, vp, cred, td);
}
} else
mtx_unlock(&nmp->nm_mtx);
npages = btoc(count);
@ -173,7 +178,7 @@ nfs_getpages(struct vop_getpages_args *ap)
relpbuf(bp, &nfs_pbuf_freecnt);
if (error && (uio.uio_resid == count)) {
printf("nfs_getpages: error %d\n", error);
nfs_printf("nfs_getpages: error %d\n", error);
VM_OBJECT_LOCK(object);
vm_page_lock_queues();
for (i = 0; i < npages; ++i) {
@ -270,8 +275,6 @@ nfs_putpages(struct vop_putpages_args *ap)
struct nfsnode *np;
vm_page_t *pages;
GIANT_REQUIRED;
vp = ap->a_vp;
np = VTONFS(vp);
td = curthread; /* XXX */
@ -282,15 +285,22 @@ nfs_putpages(struct vop_putpages_args *ap)
rtvals = ap->a_rtvals;
npages = btoc(count);
offset = IDX_TO_OFF(pages[0]->pindex);
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
}
} else
mtx_unlock(&nmp->nm_mtx);
mtx_lock(&np->n_mtx);
if (nfs_directio_enable && !nfs_directio_allow_mmap &&
(np->n_flag & NNONCACHE) && (vp->v_type == VREG))
printf("nfs_putpages: called on noncache-able vnode??\n");
(np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
mtx_unlock(&np->n_mtx);
nfs_printf("nfs_putpages: called on noncache-able vnode??\n");
mtx_lock(&np->n_mtx);
}
for (i = 0; i < npages; i++)
rtvals[i] = VM_PAGER_AGAIN;
@ -298,12 +308,12 @@ nfs_putpages(struct vop_putpages_args *ap)
/*
* When putting pages, do not extend file past EOF.
*/
if (offset + count > np->n_size) {
count = np->n_size - offset;
if (count < 0)
count = 0;
}
mtx_unlock(&np->n_mtx);
/*
* We use only the kva address for the buffer, but this is extremely
@ -349,6 +359,81 @@ nfs_putpages(struct vop_putpages_args *ap)
return rtvals[0];
}
/*
* For nfs, cache consistency can only be maintained approximately.
* Although RFC1094 does not specify the criteria, the following is
* believed to be compatible with the reference port.
* For nfs:
* If the file's modify time on the server has changed since the
* last read rpc or you have written to the file,
* you may have lost data cache consistency with the
* server, so flush all of the file's data out of the cache.
* Then force a getattr rpc to ensure that you have up to date
* attributes.
* NB: This implies that cache data can be read when up to
* NFS_ATTRTIMEO seconds out of date. If you find that you need current
* attributes this could be forced by setting n_attrstamp to 0 before
* the VOP_GETATTR() call.
*/
static inline int
nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
{
int error = 0;
struct vattr vattr;
struct nfsnode *np = VTONFS(vp);
int old_lock;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
/*
* Grab the exclusive lock before checking whether the cache is
* consistent.
* XXX - We can make this cheaper later (by acquiring cheaper locks).
* But for now, this suffices.
*/
old_lock = nfs_upgrade_vnlock(vp, td);
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
if (vp->v_type != VREG) {
if (vp->v_type != VDIR)
panic("nfs: bioread, not dir");
(nmp->nm_rpcops->nr_invaldir)(vp);
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
goto out;
}
np->n_attrstamp = 0;
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
goto out;
mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
mtx_unlock(&np->n_mtx);
} else {
mtx_unlock(&np->n_mtx);
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return (error);
mtx_lock(&np->n_mtx);
if ((np->n_flag & NSIZECHANGED)
|| (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
mtx_unlock(&np->n_mtx);
if (vp->v_type == VDIR)
(nmp->nm_rpcops->nr_invaldir)(vp);
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
goto out;
mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
np->n_flag &= ~NSIZECHANGED;
}
mtx_unlock(&np->n_mtx);
}
out:
nfs_downgrade_vnlock(vp, td, old_lock);
return error;
}
/*
* Vnode op for read using bio
*/
@ -358,7 +443,6 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
struct nfsnode *np = VTONFS(vp);
int biosize, i;
struct buf *bp, *rabp;
struct vattr vattr;
struct thread *td;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn, rabn;
@ -376,9 +460,14 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
return (EINVAL);
td = uio->uio_td;
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
} else
mtx_unlock(&nmp->nm_mtx);
if (vp->v_type != VDIR &&
(uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
return (EFBIG);
@ -389,52 +478,18 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
biosize = vp->v_mount->mnt_stat.f_iosize;
seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE);
/*
* For nfs, cache consistency can only be maintained approximately.
* Although RFC1094 does not specify the criteria, the following is
* believed to be compatible with the reference port.
* For nfs:
* If the file's modify time on the server has changed since the
* last read rpc or you have written to the file,
* you may have lost data cache consistency with the
* server, so flush all of the file's data out of the cache.
* Then force a getattr rpc to ensure that you have up to date
* attributes.
* NB: This implies that cache data can be read when up to
* NFS_ATTRTIMEO seconds out of date. If you find that you need current
* attributes this could be forced by setting n_attrstamp to 0 before
* the VOP_GETATTR() call.
*/
if (np->n_flag & NMODIFIED) {
if (vp->v_type != VREG) {
if (vp->v_type != VDIR)
panic("nfs: bioread, not dir");
(nmp->nm_rpcops->nr_invaldir)(vp);
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
return (error);
}
np->n_attrstamp = 0;
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return (error);
np->n_mtime = vattr.va_mtime;
} else {
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return (error);
if ((np->n_flag & NSIZECHANGED)
|| (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
if (vp->v_type == VDIR)
(nmp->nm_rpcops->nr_invaldir)(vp);
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
return (error);
np->n_mtime = vattr.va_mtime;
np->n_flag &= ~NSIZECHANGED;
}
}
error = nfs_bioread_check_cons(vp, td, cred);
if (error)
return error;
do {
u_quad_t nsize;
mtx_lock(&np->n_mtx);
nsize = np->n_size;
mtx_unlock(&np->n_mtx);
switch (vp->v_type) {
case VREG:
nfsstats.biocache_reads++;
@ -443,12 +498,10 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
/*
* Start the read ahead(s), as required.
* The readahead is kicked off only if sequential access
* is detected, based on the readahead hint (ra_expect_lbn).
*/
if (nmp->nm_readahead > 0 && np->ra_expect_lbn == lbn) {
if (nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
(off_t)(lbn + 1 + nra) * biosize < nsize; nra++) {
rabn = lbn + 1 + nra;
if (incore(&vp->v_bufobj, rabn) == NULL) {
rabp = nfs_getcacheblk(vp, rabn, biosize, td);
@ -472,15 +525,14 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
}
}
}
np->ra_expect_lbn = lbn + 1;
}
/* Note that bcount is *not* DEV_BSIZE aligned. */
bcount = biosize;
if ((off_t)lbn * biosize >= np->n_size) {
if ((off_t)lbn * biosize >= nsize) {
bcount = 0;
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
bcount = np->n_size - (off_t)lbn * biosize;
} else if ((off_t)(lbn + 1) * biosize > nsize) {
bcount = nsize - (off_t)lbn * biosize;
}
bp = nfs_getcacheblk(vp, lbn, bcount, td);
@ -652,7 +704,7 @@ nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
n = np->n_direofoffset - uio->uio_offset;
break;
default:
printf(" nfs_bioread: type %x unexpected\n", vp->v_type);
nfs_printf(" nfs_bioread: type %x unexpected\n", vp->v_type);
bp = NULL;
break;
};
@ -690,14 +742,18 @@ nfs_directio_write(vp, uiop, cred, ioflag)
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct thread *td = uiop->uio_td;
int size;
int wsize;
mtx_lock(&nmp->nm_mtx);
wsize = nmp->nm_wsize;
mtx_unlock(&nmp->nm_mtx);
if (ioflag & IO_SYNC) {
int iomode, must_commit;
struct uio uio;
struct iovec iov;
do_sync:
while (uiop->uio_resid > 0) {
size = min(uiop->uio_resid, nmp->nm_wsize);
size = min(uiop->uio_resid, wsize);
size = min(uiop->uio_iov->iov_len, size);
iov.iov_base = uiop->uio_iov->iov_base;
iov.iov_len = size;
@ -746,7 +802,7 @@ nfs_directio_write(vp, uiop, cred, ioflag)
* in NFS directio access.
*/
while (uiop->uio_resid > 0) {
size = min(uiop->uio_resid, nmp->nm_wsize);
size = min(uiop->uio_resid, wsize);
size = min(uiop->uio_iov->iov_len, size);
bp = getpbuf(&nfs_pbuf_freecnt);
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
@ -819,8 +875,6 @@ nfs_write(struct vop_write_args *ap)
int n, on, error = 0;
struct proc *p = td?td->td_proc:NULL;
GIANT_REQUIRED;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
panic("nfs_write mode");
@ -829,20 +883,29 @@ nfs_write(struct vop_write_args *ap)
#endif
if (vp->v_type != VREG)
return (EIO);
mtx_lock(&np->n_mtx);
if (np->n_flag & NWRITEERR) {
np->n_flag &= ~NWRITEERR;
mtx_unlock(&np->n_mtx);
return (np->n_error);
}
} else
mtx_unlock(&np->n_mtx);
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, cred, td);
} else
mtx_unlock(&nmp->nm_mtx);
/*
* Synchronously flush pending buffers if we are in synchronous
* mode or if we are appending.
*/
if (ioflag & (IO_APPEND | IO_SYNC)) {
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
#ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */
/*
* Require non-blocking, synchronous writes to
@ -857,7 +920,8 @@ nfs_write(struct vop_write_args *ap)
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
return (error);
}
} else
mtx_unlock(&np->n_mtx);
}
/*
@ -869,7 +933,9 @@ nfs_write(struct vop_write_args *ap)
error = VOP_GETATTR(vp, &vattr, cred, td);
if (error)
return (error);
mtx_lock(&np->n_mtx);
uio->uio_offset = np->n_size;
mtx_unlock(&np->n_mtx);
}
if (uio->uio_offset < 0)
@ -907,6 +973,11 @@ nfs_write(struct vop_write_args *ap)
* no point optimizing for something that really won't ever happen.
*/
if (!(ioflag & IO_SYNC)) {
int nflag;
mtx_lock(&np->n_mtx);
nflag = np->n_flag;
mtx_unlock(&np->n_mtx);
int needrestart = 0;
if (nmp->nm_wcommitsize < uio->uio_resid) {
/*
@ -918,9 +989,9 @@ nfs_write(struct vop_write_args *ap)
if (ioflag & IO_NDELAY)
return (EAGAIN);
ioflag |= IO_SYNC;
if (np->n_flag & NMODIFIED)
if (nflag & NMODIFIED)
needrestart = 1;
} else if (np->n_flag & NMODIFIED) {
} else if (nflag & NMODIFIED) {
int wouldcommit = 0;
BO_LOCK(&vp->v_bufobj);
if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
@ -961,8 +1032,9 @@ nfs_write(struct vop_write_args *ap)
* Handle direct append and file extension cases, calculate
* unaligned buffer size.
*/
mtx_lock(&np->n_mtx);
if (uio->uio_offset == np->n_size && n) {
mtx_unlock(&np->n_mtx);
/*
* Get the buffer (in its pre-append state to maintain
* B_CACHE if it was previously set). Resize the
@ -975,9 +1047,11 @@ nfs_write(struct vop_write_args *ap)
if (bp != NULL) {
long save;
mtx_lock(&np->n_mtx);
np->n_size = uio->uio_offset + n;
np->n_flag |= NMODIFIED;
vnode_pager_setsize(vp, np->n_size);
mtx_unlock(&np->n_mtx);
save = bp->b_flags & B_CACHE;
bcount += n;
@ -996,12 +1070,15 @@ nfs_write(struct vop_write_args *ap)
else
bcount = np->n_size - (off_t)lbn * biosize;
}
mtx_unlock(&np->n_mtx);
bp = nfs_getcacheblk(vp, lbn, bcount, td);
mtx_lock(&np->n_mtx);
if (uio->uio_offset + n > np->n_size) {
np->n_size = uio->uio_offset + n;
np->n_flag |= NMODIFIED;
vnode_pager_setsize(vp, np->n_size);
}
mtx_unlock(&np->n_mtx);
}
if (!bp) {
@ -1047,7 +1124,9 @@ nfs_write(struct vop_write_args *ap)
}
if (bp->b_wcred == NOCRED)
bp->b_wcred = crhold(cred);
mtx_lock(&np->n_mtx);
np->n_flag |= NMODIFIED;
mtx_unlock(&np->n_mtx);
/*
* If dirtyend exceeds file size, chop it down. This should
@ -1059,7 +1138,7 @@ nfs_write(struct vop_write_args *ap)
*/
if (bp->b_dirtyend > bcount) {
printf("NFS append race @%lx:%d\n",
nfs_printf("NFS append race @%lx:%d\n",
(long)bp->b_blkno * DEV_BSIZE,
bp->b_dirtyend - bcount);
bp->b_dirtyend = bcount;
@ -1139,7 +1218,7 @@ nfs_write(struct vop_write_args *ap)
break;
} else if ((n + on) == biosize) {
bp->b_flags |= B_ASYNC;
(void) (nmp->nm_rpcops->nr_writebp)(bp, 0, 0);
(void) (nmp->nm_rpcops->nr_writebp)(bp, 0, NULL);
} else {
bdwrite(bp);
}
@ -1229,15 +1308,7 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
slptimeo = 0;
}
if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Upgrade to exclusive lock, this might block */
vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
} else {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
}
}
old_lock = nfs_upgrade_vnlock(vp, td);
/*
* Now, flush as required.
*/
@ -1247,17 +1318,12 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
goto out;
error = vinvalbuf(vp, flags, td, 0, slptimeo);
}
mtx_lock(&np->n_mtx);
if (np->n_directio_asyncwr == 0)
np->n_flag &= ~NMODIFIED;
mtx_unlock(&np->n_mtx);
out:
if (old_lock != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Downgrade from exclusive lock, this might block */
vn_lock(vp, LK_DOWNGRADE, td);
} else {
VOP_UNLOCK(vp, 0, td);
}
}
nfs_downgrade_vnlock(vp, td, old_lock);
return error;
}
@ -1283,11 +1349,12 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
* leave the async daemons for more important rpc's (such as reads
* and writes).
*/
mtx_lock(&nfs_iod_mtx);
if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
(nmp->nm_bufqiods > nfs_numasync / 2)) {
mtx_unlock(&nfs_iod_mtx);
return(EIO);
}
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@ -1350,12 +1417,15 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
NFS_DPF(ASYNCIO,
("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
nmp->nm_bufqwant = TRUE;
error = nfs_tsleep(td, &nmp->nm_bufq, slpflag | PRIBIO,
error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx,
slpflag | PRIBIO,
"nfsaio", slptimeo);
if (error) {
error2 = nfs_sigintr(nmp, NULL, td);
if (error2)
if (error2) {
mtx_unlock(&nfs_iod_mtx);
return (error2);
}
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
@ -1385,11 +1455,17 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
BUF_KERNPROC(bp);
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen++;
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE)
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
mtx_lock(&(VTONFS(bp->b_vp))->n_mtx);
VTONFS(bp->b_vp)->n_directio_asyncwr++;
mtx_unlock(&(VTONFS(bp->b_vp))->n_mtx);
}
mtx_unlock(&nfs_iod_mtx);
return (0);
}
mtx_unlock(&nfs_iod_mtx);
/*
* All the iods are busy on other mounts, so return EIO to
* force the caller to process the i/o synchronously.
@ -1415,12 +1491,13 @@ nfs_doio_directwrite(struct buf *bp)
free(uiop, M_NFSDIRECTIO);
if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
struct nfsnode *np = VTONFS(bp->b_vp);
mtx_lock(&np->n_mtx);
np->n_directio_asyncwr--;
if ((np->n_flag & NFSYNCWAIT) && np->n_directio_asyncwr == 0) {
np->n_flag &= ~NFSYNCWAIT;
wakeup((caddr_t)&np->n_directio_asyncwr);
}
mtx_unlock(&np->n_mtx);
}
vdrop(bp->b_vp);
bp->b_vp = NULL;
@ -1441,7 +1518,8 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
struct uio uio;
struct iovec io;
struct proc *p = td ? td->td_proc : NULL;
uint8_t iocmd;
np = VTONFS(vp);
nmp = VFSTONFS(vp->v_mount);
uiop = &uio;
@ -1459,8 +1537,8 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
bp->b_ioflags &= ~BIO_ERROR;
KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
if (bp->b_iocmd == BIO_READ) {
iocmd = bp->b_iocmd;
if (iocmd == BIO_READ) {
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ;
@ -1490,11 +1568,15 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
}
}
/* ASSERT_VOP_LOCKED(vp, "nfs_doio"); */
if (p && (vp->v_vflag & VV_TEXT) &&
(NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime))) {
PROC_LOCK(p);
killproc(p, "text file modification");
PROC_UNLOCK(p);
if (p && (vp->v_vflag & VV_TEXT)) {
mtx_lock(&np->n_mtx);
if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.va_mtime)) {
mtx_unlock(&np->n_mtx);
PROC_LOCK(p);
killproc(p, "text file modification");
PROC_UNLOCK(p);
} else
mtx_unlock(&np->n_mtx);
}
break;
case VLNK:
@ -1524,7 +1606,7 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
bp->b_flags |= B_INVAL;
break;
default:
printf("nfs_doio: type %x unexpected\n", vp->v_type);
nfs_printf("nfs_doio: type %x unexpected\n", vp->v_type);
break;
};
if (error) {
@ -1558,9 +1640,10 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
/*
* Setup for actual write
*/
mtx_lock(&np->n_mtx);
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
mtx_unlock(&np->n_mtx);
if (bp->b_dirtyend > bp->b_dirtyoff) {
io.iov_len = uiop->uio_resid = bp->b_dirtyend
@ -1635,7 +1718,9 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td)
if (error) {
bp->b_ioflags |= BIO_ERROR;
bp->b_error = np->n_error = error;
mtx_lock(&np->n_mtx);
np->n_flag |= NWRITEERR;
mtx_unlock(&np->n_mtx);
}
bp->b_dirtyoff = bp->b_dirtyend = 0;
}
@ -1664,13 +1749,16 @@ int
nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize)
{
struct nfsnode *np = VTONFS(vp);
u_quad_t tsize = np->n_size;
u_quad_t tsize;
int biosize = vp->v_mount->mnt_stat.f_iosize;
int error = 0;
mtx_lock(&np->n_mtx);
tsize = np->n_size;
np->n_size = nsize;
mtx_unlock(&np->n_mtx);
if (np->n_size < tsize) {
if (nsize < tsize) {
struct buf *bp;
daddr_t lbn;
int bufsize;

View File

@ -102,17 +102,22 @@ sysctl_iodmin(SYSCTL_HANDLER_ARGS)
error = sysctl_handle_int(oidp, &newmin, 0, req);
if (error || (req->newptr == NULL))
return (error);
if (newmin > nfs_iodmax)
return (EINVAL);
mtx_lock(&nfs_iod_mtx);
if (newmin > nfs_iodmax) {
error = EINVAL;
goto out;
}
nfs_iodmin = newmin;
if (nfs_numasync >= nfs_iodmin)
return (0);
goto out;
/*
* If the current number of nfsiod is lower
* than the new minimum, create some more.
*/
for (i = nfs_iodmin - nfs_numasync; i > 0; i--)
nfs_nfsiodnew();
out:
mtx_unlock(&nfs_iod_mtx);
return (0);
}
SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0,
@ -131,9 +136,10 @@ sysctl_iodmax(SYSCTL_HANDLER_ARGS)
return (error);
if (newmax > NFS_MAXASYNCDAEMON)
return (EINVAL);
mtx_lock(&nfs_iod_mtx);
nfs_iodmax = newmax;
if (nfs_numasync <= nfs_iodmax)
return (0);
goto out;
/*
* If there are some asleep nfsiods that should
* exit, wakeup() them so that they check nfs_iodmax
@ -146,6 +152,8 @@ sysctl_iodmax(SYSCTL_HANDLER_ARGS)
wakeup(&nfs_iodwant[iod]);
iod--;
}
out:
mtx_unlock(&nfs_iod_mtx);
return (0);
}
SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0,
@ -168,8 +176,10 @@ nfs_nfsiodnew(void)
}
if (newiod == -1)
return (-1);
mtx_unlock(&nfs_iod_mtx);
error = kthread_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, RFHIGHPID,
0, "nfsiod %d", newiod);
mtx_lock(&nfs_iod_mtx);
if (error)
return (-1);
nfs_numasync++;
@ -183,6 +193,7 @@ nfsiod_setup(void *dummy)
int error;
TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin);
mtx_lock(&nfs_iod_mtx);
/* Silently limit the start number of nfsiod's */
if (nfs_iodmin > NFS_MAXASYNCDAEMON)
nfs_iodmin = NFS_MAXASYNCDAEMON;
@ -192,6 +203,7 @@ nfsiod_setup(void *dummy)
if (error == -1)
panic("nfsiod_setup: nfs_nfsiodnew failed");
}
mtx_unlock(&nfs_iod_mtx);
}
SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
@ -211,7 +223,7 @@ nfssvc_iod(void *instance)
int myiod, timo;
int error = 0;
mtx_lock(&Giant);
mtx_lock(&nfs_iod_mtx);
myiod = (int *)instance - nfs_asyncdaemon;
/*
* Main loop
@ -230,7 +242,7 @@ nfssvc_iod(void *instance)
* Always keep at least nfs_iodmin kthreads.
*/
timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz;
error = tsleep(&nfs_iodwant[myiod], PWAIT | PCATCH,
error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH,
"-", timo);
}
if (error)
@ -243,6 +255,7 @@ nfssvc_iod(void *instance)
nmp->nm_bufqwant = 0;
wakeup(&nmp->nm_bufq);
}
mtx_unlock(&nfs_iod_mtx);
if (bp->b_flags & B_DIRECT) {
KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
(void)nfs_doio_directwrite(bp);
@ -252,7 +265,7 @@ nfssvc_iod(void *instance)
else
(void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL);
}
mtx_lock(&nfs_iod_mtx);
/*
* If there are more than one iod on this mount, then defect
* so that the iods can be shared out fairly between the mounts
@ -276,7 +289,7 @@ nfssvc_iod(void *instance)
/* Someone may be waiting for the last nfsiod to terminate. */
if (--nfs_numasync == 0)
wakeup(&nfs_numasync);
mtx_unlock(&Giant);
mtx_unlock(&nfs_iod_mtx);
if ((error == 0) || (error == EWOULDBLOCK))
kthread_exit(0);
/* Abnormal termination */

View File

@ -164,6 +164,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
mtx_init(&np->n_mtx, "NFSnode lock", NULL, MTX_DEF);
*npp = np;
return (0);
@ -234,7 +235,7 @@ nfs_reclaim(struct vop_reclaim_args *ap)
if (np->n_fhsize > NFS_SMALLFH) {
FREE((caddr_t)np->n_fhp, M_NFSBIGFH);
}
mtx_destroy(&np->n_mtx);
uma_zfree(nfsnode_zone, vp->v_data);
vp->v_data = NULL;
return (0);

View File

@ -115,7 +115,7 @@ static int nfs_realign_test;
static int nfs_realign_count;
static int nfs_bufpackets = 4;
static int nfs_reconnects;
static int nfs3_jukebox_delay = 10;
static int nfs3_jukebox_delay = 10;
SYSCTL_DECL(_vfs_nfs);
@ -125,8 +125,7 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "");
SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
"number of times the nfs client has had to reconnect");
SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
"number of seconds to delay a retry after receiving EJUKEBOX");
"number of seconds to delay a retry after receiving EJUKEBOX");
/*
* There is a congestion window for outstanding rpcs maintained per mount
@ -154,10 +153,8 @@ static void nfs_softterm(struct nfsreq *rep);
static int nfs_reconnect(struct nfsreq *rep);
static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag);
static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag);
static void wakeup_nfsreq(struct nfsreq *req);
extern struct mtx nfs_reqq_mtx;
extern struct mtx nfs_reply_mtx;
/*
* Initialize sockets and congestion for a new NFS connection.
@ -172,13 +169,13 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
struct sockaddr *saddr;
struct thread *td = &thread0; /* only used for socreate and sobind */
NET_ASSERT_GIANT();
NET_LOCK_GIANT();
if (nmp->nm_sotype == SOCK_STREAM) {
mtx_lock(&nmp->nm_nfstcpstate.mtx);
mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
nmp->nm_nfstcpstate.rpcresid = 0;
mtx_unlock(&nmp->nm_nfstcpstate.mtx);
mtx_unlock(&nmp->nm_mtx);
}
nmp->nm_so = NULL;
saddr = nmp->nm_nam;
@ -243,12 +240,16 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
* Protocols that do not require connections may be optionally left
* unconnected for servers that reply from a port other than NFS_PORT.
*/
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_flag & NFSMNT_NOCONN) {
if (nmp->nm_soflags & PR_CONNREQUIRED) {
error = ENOTCONN;
mtx_unlock(&nmp->nm_mtx);
goto bad;
}
} else
mtx_unlock(&nmp->nm_mtx);
} else {
mtx_unlock(&nmp->nm_mtx);
error = soconnect(so, nmp->nm_nam, td);
if (error)
goto bad;
@ -290,7 +291,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
pktscale = 2;
if (pktscale > 64)
pktscale = 64;
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_sotype == SOCK_DGRAM) {
sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
@ -313,7 +314,9 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
sopt.sopt_val = &val;
sopt.sopt_valsize = sizeof val;
val = 1;
mtx_unlock(&nmp->nm_mtx);
sosetopt(so, &sopt);
mtx_lock(&nmp->nm_mtx);
}
if (so->so_proto->pr_protocol == IPPROTO_TCP) {
struct sockopt sopt;
@ -326,13 +329,16 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
sopt.sopt_val = &val;
sopt.sopt_valsize = sizeof val;
val = 1;
mtx_unlock(&nmp->nm_mtx);
sosetopt(so, &sopt);
mtx_lock(&nmp->nm_mtx);
}
sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
sizeof (u_int32_t)) * pktscale;
rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
sizeof (u_int32_t)) * pktscale;
}
mtx_unlock(&nmp->nm_mtx);
error = soreserve(so, sndreserve, rcvreserve);
if (error)
goto bad;
@ -349,6 +355,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
so->so_snd.sb_flags |= SB_NOINTR;
SOCKBUF_UNLOCK(&so->so_snd);
mtx_lock(&nmp->nm_mtx);
/* Initialize other non-zero congestion variables */
nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
nmp->nm_srtt[3] = (NFS_TIMEO << 3);
@ -357,10 +364,13 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
nmp->nm_sent = 0;
nmp->nm_timeouts = 0;
mtx_unlock(&nmp->nm_mtx);
NET_UNLOCK_GIANT();
return (0);
bad:
nfs_disconnect(nmp);
NET_UNLOCK_GIANT();
return (error);
}
@ -387,7 +397,9 @@ nfs_reconnect(struct nfsreq *rep)
error = EINTR;
if (error == EIO || error == EINTR)
return (error);
mtx_lock(&Giant);
(void) tsleep(&lbolt, PSOCK, "nfscon", 0);
mtx_unlock(&Giant);
}
/*
@ -399,9 +411,10 @@ nfs_reconnect(struct nfsreq *rep)
* until the connection is established successfully, and
* then re-transmit the request.
*/
mtx_lock(&nmp->nm_nfstcpstate.mtx);
mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags &= ~NFS_TCP_FORCE_RECONNECT;
mtx_unlock(&nmp->nm_nfstcpstate.mtx);
nmp->nm_nfstcpstate.rpcresid = 0;
mtx_unlock(&nmp->nm_mtx);
/*
* Loop through outstanding request list and fix up all requests
@ -409,8 +422,11 @@ nfs_reconnect(struct nfsreq *rep)
*/
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
if (rp->r_nmp == nmp)
if (rp->r_nmp == nmp) {
mtx_lock(&rp->r_mtx);
rp->r_flags |= R_MUSTRESEND;
mtx_unlock(&rp->r_mtx);
}
}
mtx_unlock(&nfs_reqq_mtx);
return (0);
@ -426,9 +442,11 @@ nfs_disconnect(struct nfsmount *nmp)
NET_ASSERT_GIANT();
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_so) {
so = nmp->nm_so;
nmp->nm_so = NULL;
mtx_unlock(&nmp->nm_mtx);
SOCKBUF_LOCK(&so->so_rcv);
so->so_upcallarg = NULL;
so->so_upcall = NULL;
@ -436,7 +454,8 @@ nfs_disconnect(struct nfsmount *nmp)
SOCKBUF_UNLOCK(&so->so_rcv);
soshutdown(so, SHUT_WR);
soclose(so);
}
} else
mtx_unlock(&nmp->nm_mtx);
}
void
@ -463,22 +482,29 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
struct sockaddr *sendnam;
int error, error2, soflags, flags;
NET_ASSERT_GIANT();
NET_LOCK_GIANT();
KASSERT(rep, ("nfs_send: called with rep == NULL"));
error = nfs_sigintr(rep->r_nmp, rep, rep->r_td);
if (error) {
m_freem(top);
return (error);
goto out;
}
mtx_lock(&rep->r_nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
if ((so = rep->r_nmp->nm_so) == NULL) {
rep->r_flags |= R_MUSTRESEND;
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
m_freem(top);
return (0);
error = 0;
goto out;
}
rep->r_flags &= ~R_MUSTRESEND;
soflags = rep->r_nmp->nm_soflags;
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
sendnam = NULL;
@ -493,7 +519,9 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
flags, curthread /*XXX*/);
if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
error = 0;
mtx_lock(&rep->r_mtx);
rep->r_flags |= R_MUSTRESEND;
mtx_unlock(&rep->r_mtx);
}
if (error) {
@ -513,8 +541,11 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
error2 = NFS_SIGREP(rep);
if (error2)
error = error2;
else
else {
mtx_lock(&rep->r_mtx);
rep->r_flags |= R_MUSTRESEND;
mtx_unlock(&rep->r_mtx);
}
/*
* Handle any recoverable (soft) socket errors here. (?)
@ -523,6 +554,8 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
error != EWOULDBLOCK && error != EPIPE)
error = 0;
}
out:
NET_UNLOCK_GIANT();
return (error);
}
@ -533,7 +566,7 @@ nfs_reply(struct nfsreq *rep)
register struct mbuf *m;
int error = 0, sotype, slpflag;
NET_ASSERT_GIANT();
NET_LOCK_GIANT();
sotype = rep->r_nmp->nm_sotype;
/*
@ -543,30 +576,39 @@ nfs_reply(struct nfsreq *rep)
if (sotype != SOCK_DGRAM) {
error = nfs_sndlock(rep);
if (error)
return (error);
goto out;
tryagain:
mtx_lock(&rep->r_nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
if (rep->r_mrep) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
nfs_sndunlock(rep);
return (0);
error = 0;
goto out;
}
if (rep->r_flags & R_SOFTTERM) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
nfs_sndunlock(rep);
return (EINTR);
error = EINTR;
goto out;
}
so = rep->r_nmp->nm_so;
mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
if (!so ||
(rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT)) {
mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
error = nfs_reconnect(rep);
if (error) {
nfs_sndunlock(rep);
return (error);
goto out;
}
goto tryagain;
} else
mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
}
while (rep->r_flags & R_MUSTRESEND) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
nfsstats.rpcretries++;
error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
@ -574,41 +616,59 @@ nfs_reply(struct nfsreq *rep)
if (error == EINTR || error == ERESTART ||
(error = nfs_reconnect(rep)) != 0) {
nfs_sndunlock(rep);
return (error);
goto out;
}
goto tryagain;
}
mtx_lock(&rep->r_nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
}
mtx_unlock(&rep->r_nmp->nm_mtx);
mtx_unlock(&rep->r_mtx);
nfs_sndunlock(rep);
}
slpflag = 0;
mtx_lock(&rep->r_nmp->nm_mtx);
if (rep->r_nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
mtx_lock(&nfs_reply_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
while ((rep->r_mrep == NULL) && (error == 0) &&
((rep->r_flags & R_SOFTTERM) == 0) &&
((sotype == SOCK_DGRAM) || ((rep->r_flags & R_MUSTRESEND) == 0)))
error = msleep((caddr_t)rep, &nfs_reply_mtx,
error = msleep((caddr_t)rep, &rep->r_mtx,
slpflag | (PZERO - 1), "nfsreq", 0);
mtx_unlock(&nfs_reply_mtx);
if (error == EINTR || error == ERESTART)
if (error == EINTR || error == ERESTART) {
/* NFS operations aren't restartable. Map ERESTART to EINTR */
return (EINTR);
if (rep->r_flags & R_SOFTTERM)
error = EINTR;
mtx_unlock(&rep->r_mtx);
goto out;
}
if (rep->r_flags & R_SOFTTERM) {
/* Request was terminated because we exceeded the retries (soft mount) */
return (ETIMEDOUT);
error = ETIMEDOUT;
mtx_unlock(&rep->r_mtx);
goto out;
}
mtx_unlock(&rep->r_mtx);
if (sotype == SOCK_STREAM) {
mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
mtx_lock(&rep->r_nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
if (((rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) ||
(rep->r_flags & R_MUSTRESEND))) {
mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
error = nfs_sndlock(rep);
if (error)
return (error);
goto out;
goto tryagain;
} else
mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
} else {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&rep->r_nmp->nm_mtx);
}
}
out:
NET_UNLOCK_GIANT();
return (error);
}
@ -660,6 +720,8 @@ nfs_clnt_match_xid(struct socket *so,
* Iff no match, just drop the datagram
*/
TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
mtx_lock(&nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
if (rep->r_mrep == NULL && rxid == rep->r_xid) {
/* Found it.. */
rep->r_mrep = mrep;
@ -703,8 +765,13 @@ nfs_clnt_match_xid(struct socket *so,
NFS_SDRTT(rep) += t1;
}
nmp->nm_timeouts = 0;
wakeup((caddr_t)rep);
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
break;
}
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
}
/*
* If not matched to a request, drop it.
@ -713,31 +780,18 @@ nfs_clnt_match_xid(struct socket *so,
if (rep == 0) {
nfsstats.rpcunexpected++;
m_freem(mrep);
} else
wakeup_nfsreq(rep);
}
mtx_unlock(&nfs_reqq_mtx);
}
/*
* The wakeup of the requestor should be done under the mutex
* to avoid potential missed wakeups.
*/
static void
wakeup_nfsreq(struct nfsreq *req)
{
mtx_lock(&nfs_reply_mtx);
wakeup((caddr_t)req);
mtx_unlock(&nfs_reply_mtx);
}
static void
nfs_mark_for_reconnect(struct nfsmount *nmp)
{
struct nfsreq *rp;
mtx_lock(&nmp->nm_nfstcpstate.mtx);
mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.flags |= NFS_TCP_FORCE_RECONNECT;
mtx_unlock(&nmp->nm_nfstcpstate.mtx);
mtx_unlock(&nmp->nm_mtx);
/*
* Wakeup all processes that are waiting for replies
* on this mount point. One of them does the reconnect.
@ -745,8 +799,10 @@ nfs_mark_for_reconnect(struct nfsmount *nmp)
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
if (rp->r_nmp == nmp) {
mtx_lock(&rp->r_mtx);
rp->r_flags |= R_MUSTRESEND;
wakeup_nfsreq(rp);
wakeup((caddr_t)rp);
mtx_unlock(&rp->r_mtx);
}
}
mtx_unlock(&nfs_reqq_mtx);
@ -795,19 +851,21 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
* Don't pick any more data from the socket if we've marked the
* mountpoint for reconnect.
*/
mtx_lock(&nmp->nm_nfstcpstate.mtx);
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) {
mtx_unlock(&nmp->nm_nfstcpstate.mtx);
mtx_unlock(&nmp->nm_mtx);
return;
} else
mtx_unlock(&nmp->nm_nfstcpstate.mtx);
mtx_unlock(&nmp->nm_mtx);
auio.uio_td = curthread;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
for ( ; ; ) {
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.flags & NFS_TCP_EXPECT_RPCMARKER) {
int resid;
mtx_unlock(&nmp->nm_mtx);
if (!nfstcp_marker_readable(so)) {
/* Marker is not readable */
return;
@ -864,14 +922,20 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
nmp->nm_mountp->mnt_stat.f_mntfromname);
goto mark_reconnect;
}
mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.rpcresid = len;
nmp->nm_nfstcpstate.flags &= ~(NFS_TCP_EXPECT_RPCMARKER);
}
mtx_unlock(&nmp->nm_mtx);
} else
mtx_unlock(&nmp->nm_mtx);
/*
* Processed RPC marker or no RPC marker to process.
* Pull in and process data.
*/
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_nfstcpstate.rpcresid > 0) {
mtx_unlock(&nmp->nm_mtx);
if (!nfstcp_readable(so, nmp->nm_nfstcpstate.rpcresid)) {
/* All data not readable */
return;
@ -894,11 +958,14 @@ nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
}
if (mp == NULL)
panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n");
mtx_lock(&nmp->nm_mtx);
nmp->nm_nfstcpstate.rpcresid = 0;
nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
mtx_unlock(&nmp->nm_mtx);
/* We got the entire RPC reply. Match XIDs and wake up requestor */
nfs_clnt_match_xid(so, nmp, mp);
}
} else
mtx_unlock(&nmp->nm_mtx);
}
mark_reconnect:
@ -953,7 +1020,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
struct mbuf *m, *md, *mheadend;
time_t waituntil;
caddr_t dpos;
int s, error = 0, mrest_len, auth_len, auth_type;
int error = 0, mrest_len, auth_len, auth_type;
struct timeval now;
u_int32_t *xidp;
@ -966,11 +1033,12 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp);
MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
rep->r_mrep = rep->r_md = NULL;
bzero(rep, sizeof(struct nfsreq));
rep->r_nmp = nmp;
rep->r_vp = vp;
rep->r_td = td;
rep->r_procnum = procnum;
mtx_init(&rep->r_mtx, "NFSrep lock", NULL, MTX_DEF);
getmicrouptime(&now);
rep->r_lastmsg = now.tv_sec -
@ -1019,7 +1087,6 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
* Chain request into list of outstanding requests. Be sure
* to put it LAST so timer finds oldest requests first.
*/
s = splsoftclock();
mtx_lock(&nfs_reqq_mtx);
if (TAILQ_EMPTY(&nfs_reqq))
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
@ -1031,10 +1098,11 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
* send this one now but let timer do it. If not timing a request,
* do it now.
*/
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
nmp->nm_sent < nmp->nm_cwnd)) {
splx(s);
mtx_unlock(&nmp->nm_mtx);
error = nfs_sndlock(rep);
if (!error) {
m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT);
@ -1047,12 +1115,14 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
* blocking on nfs_send() too long, so check for R_SENT here.
*/
if (!error && (rep->r_flags & (R_SENT | R_MUSTRESEND)) == 0) {
mtx_lock(&nmp->nm_mtx);
nmp->nm_sent += NFS_CWNDSCALE;
mtx_unlock(&nmp->nm_mtx);
rep->r_flags |= R_SENT;
}
mtx_unlock(&nfs_reqq_mtx);
} else {
splx(s);
mtx_unlock(&nmp->nm_mtx);
rep->r_rtt = -1;
}
@ -1065,7 +1135,6 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
/*
* RPC done, unlink the request.
*/
s = splsoftclock();
mtx_lock(&nfs_reqq_mtx);
/*
* nfs_timer() may be in the process of re-transmitting this request.
@ -1086,10 +1155,11 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
*/
if (rep->r_flags & R_SENT) {
rep->r_flags &= ~R_SENT; /* paranoia */
mtx_lock(&nmp->nm_mtx);
nmp->nm_sent -= NFS_CWNDSCALE;
mtx_unlock(&nmp->nm_mtx);
}
mtx_unlock(&nfs_reqq_mtx);
splx(s);
/*
* If there was a successful reply and a tprintf msg.
@ -1113,6 +1183,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
if (rep->r_mrep != NULL)
m_freem(rep->r_mrep);
m_freem(rep->r_mreq);
mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@ -1131,6 +1202,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
error = EACCES;
m_freem(mrep);
m_freem(rep->r_mreq);
mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@ -1153,12 +1225,16 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
m_freem(mrep);
error = 0;
waituntil = time_second + nfs3_jukebox_delay;
while (time_second < waituntil)
(void) tsleep(&lbolt,
PSOCK, "nqnfstry", 0);
while (time_second < waituntil) {
mtx_lock(&Giant);
(void) tsleep(&lbolt, PSOCK, "nqnfstry", 0);
mtx_unlock(&Giant);
}
mtx_lock(&nfs_reqq_mtx);
if (++nfs_xid == 0)
nfs_xid++;
rep->r_xid = *xidp = txdr_unsigned(nfs_xid);
mtx_unlock(&nfs_reqq_mtx);
goto tryagain;
}
@ -1176,6 +1252,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
} else
m_freem(mrep);
m_freem(rep->r_mreq);
mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@ -1184,6 +1261,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
*mdp = md;
*dposp = dpos;
m_freem(rep->r_mreq);
mtx_destroy(&rep->r_mtx);
FREE((caddr_t)rep, M_NFSREQ);
return (0);
}
@ -1191,6 +1269,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
error = EPROTONOSUPPORT;
nfsmout:
m_freem(rep->r_mreq);
mtx_destroy(&rep->r_mtx);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
@ -1215,34 +1294,34 @@ nfs_timer(void *arg)
struct socket *so;
struct nfsmount *nmp;
int timeo;
int s, error;
int error;
struct timeval now;
getmicrouptime(&now);
s = splnet();
mtx_lock(&Giant); /* nfs_down -> tprintf */
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
nmp = rep->r_nmp;
if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
mtx_lock(&rep->r_mtx);
if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
mtx_unlock(&rep->r_mtx);
continue;
} else
mtx_unlock(&rep->r_mtx);
if (nfs_sigintr(nmp, rep, rep->r_td))
continue;
mtx_lock(&nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
if (nmp->nm_tprintf_initial_delay != 0 &&
(rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) &&
rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
rep->r_lastmsg = now.tv_sec;
nfs_down(rep, nmp, rep->r_td, "not responding",
0, NFSSTA_TIMEO);
#if 0
if (!(nmp->nm_state & NFSSTA_MOUNTED)) {
/* we're not yet completely mounted and */
/* we can't complete an RPC, so we fail */
nfsstats.rpctimeouts++;
nfs_softterm(rep);
continue;
}
#endif
0, NFSSTA_TIMEO);
mtx_lock(&nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
}
if (rep->r_rtt >= 0) {
rep->r_rtt++;
@ -1252,14 +1331,19 @@ nfs_timer(void *arg)
timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
if (nmp->nm_timeouts > 0)
timeo *= nfs_backoff[nmp->nm_timeouts - 1];
if (rep->r_rtt <= timeo)
if (rep->r_rtt <= timeo) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
continue;
}
if (nmp->nm_timeouts < NFS_NBACKOFF)
nmp->nm_timeouts++;
}
if (rep->r_rexmit >= rep->r_retry) { /* too many */
nfsstats.rpctimeouts++;
nfs_softterm(rep);
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
continue;
}
if (nmp->nm_sotype != SOCK_DGRAM) {
@ -1272,12 +1356,17 @@ nfs_timer(void *arg)
* if necessary.
*/
rep->r_flags |= R_MUSTRESEND;
wakeup_nfsreq(rep);
wakeup((caddr_t)rep);
rep->r_rtt = 0;
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
continue;
}
if ((so = nmp->nm_so) == NULL)
if ((so = nmp->nm_so) == NULL) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
continue;
}
/*
* If there is enough space and the window allows..
* Resend it
@ -1285,57 +1374,69 @@ nfs_timer(void *arg)
*/
rep->r_rtt = -1;
if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
(rep->r_flags & R_SENT) ||
nmp->nm_sent < nmp->nm_cwnd) &&
(m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
/*
* Mark the request to indicate that a XMIT is in progress
* to prevent the req structure being removed in nfs_request().
*/
rep->r_flags |= R_REXMIT_INPROG;
mtx_unlock(&nfs_reqq_mtx);
NET_LOCK_GIANT();
if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, NULL, NULL, curthread);
else
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, nmp->nm_nam, NULL, curthread);
NET_UNLOCK_GIANT();
mtx_lock(&nfs_reqq_mtx);
rep->r_flags &= ~R_REXMIT_INPROG;
wakeup((caddr_t)&rep->r_flags);
if (error) {
if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
so->so_error = 0;
rep->r_flags |= R_RESENDERR;
} else {
((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) ||
nmp->nm_sent < nmp->nm_cwnd)) {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
if ((m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
/*
* Iff first send, start timing
* else turn timing off, backoff timer
* and divide congestion window by 2.
* Mark the request to indicate that a XMIT is in
* progress to prevent the req structure being
* removed in nfs_request().
*/
rep->r_flags &= ~R_RESENDERR;
if (rep->r_flags & R_SENT) {
rep->r_flags &= ~R_TIMING;
if (++rep->r_rexmit > NFS_MAXREXMIT)
rep->r_rexmit = NFS_MAXREXMIT;
nmp->nm_cwnd >>= 1;
if (nmp->nm_cwnd < NFS_CWNDSCALE)
nmp->nm_cwnd = NFS_CWNDSCALE;
nfsstats.rpcretries++;
mtx_lock(&rep->r_mtx);
rep->r_flags |= R_REXMIT_INPROG;
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nfs_reqq_mtx);
NET_LOCK_GIANT();
if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, NULL, NULL, curthread);
else
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, nmp->nm_nam, NULL,
curthread);
NET_UNLOCK_GIANT();
mtx_lock(&nfs_reqq_mtx);
mtx_lock(&nmp->nm_mtx);
mtx_lock(&rep->r_mtx);
rep->r_flags &= ~R_REXMIT_INPROG;
wakeup((caddr_t)&rep->r_flags);
if (error) {
if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
so->so_error = 0;
rep->r_flags |= R_RESENDERR;
} else {
rep->r_flags |= R_SENT;
nmp->nm_sent += NFS_CWNDSCALE;
/*
* Iff first send, start timing
* else turn timing off, backoff timer
* and divide congestion window by 2.
*/
rep->r_flags &= ~R_RESENDERR;
if (rep->r_flags & R_SENT) {
rep->r_flags &= ~R_TIMING;
if (++rep->r_rexmit > NFS_MAXREXMIT)
rep->r_rexmit = NFS_MAXREXMIT;
nmp->nm_cwnd >>= 1;
if (nmp->nm_cwnd < NFS_CWNDSCALE)
nmp->nm_cwnd = NFS_CWNDSCALE;
nfsstats.rpcretries++;
} else {
rep->r_flags |= R_SENT;
nmp->nm_sent += NFS_CWNDSCALE;
}
rep->r_rtt = 0;
}
rep->r_rtt = 0;
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
}
} else {
mtx_unlock(&rep->r_mtx);
mtx_unlock(&nmp->nm_mtx);
}
}
mtx_unlock(&nfs_reqq_mtx);
mtx_unlock(&Giant); /* nfs_down -> tprintf */
splx(s);
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
}
@ -1349,31 +1450,33 @@ nfs_nmcancelreqs(nmp)
struct nfsmount *nmp;
{
struct nfsreq *req;
int i, s;
int i;
s = splnet();
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
mtx_lock(&req->r_mtx);
if (nmp != req->r_nmp || req->r_mrep != NULL ||
(req->r_flags & R_SOFTTERM))
(req->r_flags & R_SOFTTERM)) {
mtx_unlock(&req->r_mtx);
continue;
}
nfs_softterm(req);
mtx_unlock(&req->r_mtx);
}
mtx_unlock(&nfs_reqq_mtx);
splx(s);
for (i = 0; i < 30; i++) {
s = splnet();
mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (nmp == req->r_nmp)
break;
}
mtx_unlock(&nfs_reqq_mtx);
splx(s);
if (req == NULL)
return (0);
mtx_lock(&Giant);
tsleep(&lbolt, PSOCK, "nfscancel", 0);
mtx_unlock(&Giant);
}
return (EBUSY);
}
@ -1387,7 +1490,7 @@ nfs_nmcancelreqs(nmp)
static void
nfs_softterm(struct nfsreq *rep)
{
KASSERT(mtx_owned(&rep->r_mtx), ("NFS req lock not owned !"));
rep->r_flags |= R_SOFTTERM;
if (rep->r_flags & R_SENT) {
rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
@ -1397,7 +1500,7 @@ nfs_softterm(struct nfsreq *rep)
* Request terminated, wakeup the blocked process, so that we
* can return EINTR back.
*/
wakeup_nfsreq(rep);
wakeup((caddr_t)rep);
}
/*
@ -1493,28 +1596,6 @@ nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *
return (error);
}
/*
* NFS wrapper to tsleep(), that shoves a new p_sigmask and restores the
* old one after tsleep() returns.
*/
int
nfs_tsleep(struct thread *td, void *ident, int priority, char *wmesg, int timo)
{
sigset_t oldset;
int error;
struct proc *p;
if ((priority & PCATCH) == 0)
return tsleep(ident, priority, wmesg, timo);
if (td == NULL)
td = curthread; /* XXX */
nfs_set_sigmask(td, &oldset);
error = tsleep(ident, priority, wmesg, timo);
nfs_restore_sigmask(td, &oldset);
p = td->td_proc;
return (error);
}
/*
* Test for a termination condition pending on the process.
* This is used for NFSMNT_INT mounts.
@ -1524,19 +1605,28 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
{
struct proc *p;
sigset_t tmpset;
int error = 0;
if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
return nfs4_sigintr(nmp, rep, td);
if (rep && (rep->r_flags & R_SOFTTERM))
return (EIO);
if (rep) {
mtx_lock(&rep->r_mtx);
if (rep->r_flags & R_SOFTTERM) {
mtx_unlock(&rep->r_mtx);
error = EIO;
goto out;
} else
mtx_unlock(&rep->r_mtx);
}
/* Terminate all requests while attempting a forced unmount. */
if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
return (EIO);
if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) {
error = EIO;
goto out;
}
if (!(nmp->nm_flag & NFSMNT_INT))
return (0);
goto out;
if (td == NULL)
return (0);
p = td->td_proc;
PROC_LOCK(p);
tmpset = p->p_siglist;
@ -1551,6 +1641,8 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
PROC_UNLOCK(p);
return (0);
out:
return(error);
}
/*
@ -1567,21 +1659,25 @@ nfs_sndlock(struct nfsreq *rep)
int error, slpflag = 0, slptimeo = 0;
td = rep->r_td;
mtx_lock(&rep->r_nmp->nm_mtx);
if (rep->r_nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
while (*statep & NFSSTA_SNDLOCK) {
error = nfs_sigintr(rep->r_nmp, rep, td);
if (error)
if (error) {
mtx_unlock(&rep->r_nmp->nm_mtx);
return (error);
}
*statep |= NFSSTA_WANTSND;
(void) tsleep(statep, slpflag | (PZERO - 1),
"nfsndlck", slptimeo);
(void) msleep(statep, &rep->r_nmp->nm_mtx,
slpflag | (PZERO - 1), "nfsndlck", slptimeo);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
}
}
*statep |= NFSSTA_SNDLOCK;
mtx_unlock(&rep->r_nmp->nm_mtx);
return (0);
}
@ -1593,6 +1689,7 @@ nfs_sndunlock(struct nfsreq *rep)
{
int *statep = &rep->r_nmp->nm_state;
mtx_lock(&rep->r_nmp->nm_mtx);
if ((*statep & NFSSTA_SNDLOCK) == 0)
panic("nfs sndunlock");
*statep &= ~NFSSTA_SNDLOCK;
@ -1600,6 +1697,7 @@ nfs_sndunlock(struct nfsreq *rep)
*statep &= ~NFSSTA_WANTSND;
wakeup(statep);
}
mtx_unlock(&rep->r_nmp->nm_mtx);
}
/*
@ -1703,8 +1801,10 @@ nfs_down(rep, nmp, td, msg, error, flags)
nmp->nm_state |= NFSSTA_LOCKTIMEO;
}
#endif
mtx_lock(&rep->r_mtx);
if (rep)
rep->r_flags |= R_TPRINTFMSG;
mtx_unlock(&rep->r_mtx);
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
}
@ -1721,8 +1821,10 @@ nfs_up(rep, nmp, td, msg, flags)
if (nmp == NULL)
return;
mtx_lock(&rep->r_mtx);
if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0)
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
mtx_unlock(&rep->r_mtx);
if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
nmp->nm_state &= ~NFSSTA_TIMEO;
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
@ -1736,4 +1838,3 @@ nfs_up(rep, nmp, td, msg, flags)
}
#endif
}

View File

@ -75,6 +75,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
/*
* Note that stdarg.h and the ANSI style va_start macro is used for both
* ANSI and traditional C compilers.
*/
#include <machine/stdarg.h>
/*
* Data items converted to xdr at startup, since they are constant
* This is kinda hokey, but may save a little time doing byte swaps
@ -95,7 +101,6 @@ int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct mtx nfs_reqq_mtx;
struct mtx nfs_reply_mtx;
struct nfs_bufq nfs_bufq;
/*
@ -182,6 +187,7 @@ nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
*/
tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED);
mtx_lock(&nfs_reqq_mtx);
/* Get a pretty random xid to start with */
if (!nfs_xid)
nfs_xid = random();
@ -193,6 +199,7 @@ nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
*xidpp = tl;
*tl++ = txdr_unsigned(nfs_xid);
mtx_unlock(&nfs_reqq_mtx);
*tl++ = rpc_call;
*tl++ = rpc_vers;
*tl++ = txdr_unsigned(NFS_PROG);
@ -416,7 +423,7 @@ nfs_init(struct vfsconf *vfsp)
TAILQ_INIT(&nfs_reqq);
callout_init(&nfs_callout, CALLOUT_MPSAFE);
mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF);
mtx_init(&nfs_reply_mtx, "Synch NFS reply posting", NULL, MTX_DEF);
mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF);
nfs_pbuf_freecnt = nswbuf / 2 + 1;
@ -437,19 +444,80 @@ nfs_uninit(struct vfsconf *vfsp)
* Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup
* any sleeping nfsiods so they check nfs_iodmax and exit.
*/
mtx_lock(&nfs_iod_mtx);
nfs_iodmax = 0;
for (i = 0; i < nfs_numasync; i++)
if (nfs_iodwant[i])
wakeup(&nfs_iodwant[i]);
/* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */
while (nfs_numasync)
tsleep(&nfs_numasync, PWAIT, "ioddie", 0);
msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0);
mtx_unlock(&nfs_iod_mtx);
nfs_nhuninit();
uma_zdestroy(nfsmount_zone);
return (0);
}
void
nfs_dircookie_lock(struct nfsnode *np)
{
mtx_lock(&np->n_mtx);
while (np->n_flag & NDIRCOOKIELK)
(void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0);
np->n_flag |= NDIRCOOKIELK;
mtx_unlock(&np->n_mtx);
}
void
nfs_dircookie_unlock(struct nfsnode *np)
{
mtx_lock(&np->n_mtx);
np->n_flag &= ~NDIRCOOKIELK;
wakeup(&np->n_flag);
mtx_unlock(&np->n_mtx);
}
int
nfs_upgrade_vnlock(struct vnode *vp, struct thread *td)
{
int old_lock;
if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Upgrade to exclusive lock, this might block */
vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
} else {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
}
}
return old_lock;
}
void
nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock)
{
if (old_lock != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Downgrade from exclusive lock, this might block */
vn_lock(vp, LK_DOWNGRADE, td);
} else {
VOP_UNLOCK(vp, 0, td);
}
}
}
void
nfs_printf(const char *fmt, ...)
{
va_list ap;
mtx_lock(&Giant);
va_start(ap, fmt);
printf(fmt, ap);
va_end(ap);
mtx_unlock(&Giant);
}
/*
* Attribute cache routines.
* nfs_loadattrcache() - loads or updates the cache contents from attributes
@ -466,7 +534,7 @@ nfs_uninit(struct vfsconf *vfsp)
*/
int
nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
struct vattr *vaper, int dontshrink)
struct vattr *vaper, int dontshrink)
{
struct vnode *vp = *vpp;
struct vattr *vap;
@ -535,6 +603,7 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
* information.
*/
np = VTONFS(vp);
mtx_lock(&np->n_mtx);
if (vp->v_type != vtyp) {
vp->v_type = vtyp;
if (vp->v_type == VFIFO)
@ -617,6 +686,7 @@ nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
vaper->va_mtime = np->n_mtim;
}
}
mtx_unlock(&np->n_mtx);
return (0);
}
@ -639,16 +709,20 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
struct vattr *vap;
struct nfsmount *nmp;
int timeo;
np = VTONFS(vp);
vap = &np->n_vattr;
nmp = VFSTONFS(vp->v_mount);
#ifdef NFS_ACDEBUG
mtx_lock(&Giant); /* nfs_printf() */
#endif
mtx_lock(&np->n_mtx);
/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
timeo = (time_second - np->n_mtime.tv_sec) / 10;
#ifdef NFS_ACDEBUG
if (nfs_acdebug>1)
printf("nfs_getattrcache: initial timeo = %d\n", timeo);
nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo);
#endif
if (vap->va_type == VDIR) {
@ -665,18 +739,19 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
#ifdef NFS_ACDEBUG
if (nfs_acdebug > 2)
printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
nmp->nm_acregmin, nmp->nm_acregmax,
nmp->nm_acdirmin, nmp->nm_acdirmax);
nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
nmp->nm_acregmin, nmp->nm_acregmax,
nmp->nm_acdirmin, nmp->nm_acdirmax);
if (nfs_acdebug)
printf("nfs_getattrcache: age = %d; final timeo = %d\n",
(time_second - np->n_attrstamp), timeo);
nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n",
(time_second - np->n_attrstamp), timeo);
#endif
if ((time_second - np->n_attrstamp) >= timeo) {
nfsstats.attrcache_misses++;
return (ENOENT);
mtx_unlock(&np->n_mtx);
return( ENOENT);
}
nfsstats.attrcache_hits++;
if (vap->va_size != np->n_size) {
@ -701,6 +776,10 @@ nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
if (np->n_flag & NUPD)
vaper->va_mtime = np->n_mtim;
}
mtx_unlock(&np->n_mtx);
#ifdef NFS_ACDEBUG
mtx_unlock(&Giant); /* nfs_printf() */
#endif
return (0);
}
@ -714,7 +793,8 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
{
struct nfsdmap *dp, *dp2;
int pos;
nfsuint64 *retval = NULL;
pos = (uoff_t)off / NFS_DIRBLKSIZ;
if (pos == 0 || off < 0) {
#ifdef DIAGNOSTIC
@ -732,14 +812,14 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
dp->ndm_eocookie = 0;
LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
} else
return (NULL);
goto out;
}
while (pos >= NFSNUMCOOKIES) {
pos -= NFSNUMCOOKIES;
if (LIST_NEXT(dp, ndm_list)) {
if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
pos >= dp->ndm_eocookie)
return (NULL);
pos >= dp->ndm_eocookie)
goto out;
dp = LIST_NEXT(dp, ndm_list);
} else if (add) {
MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
@ -748,15 +828,17 @@ nfs_getcookie(struct nfsnode *np, off_t off, int add)
LIST_INSERT_AFTER(dp, dp2, ndm_list);
dp = dp2;
} else
return (NULL);
goto out;
}
if (pos >= dp->ndm_eocookie) {
if (add)
dp->ndm_eocookie = pos + 1;
else
return (NULL);
goto out;
}
return (&dp->ndm_cookies[pos]);
retval = &dp->ndm_cookies[pos];
out:
return (retval);
}
/*
@ -773,11 +855,13 @@ nfs_invaldir(struct vnode *vp)
if (vp->v_type != VDIR)
panic("nfs: invaldir not dir");
#endif
nfs_dircookie_lock(np);
np->n_direofoffset = 0;
np->n_cookieverf.nfsuquad[0] = 0;
np->n_cookieverf.nfsuquad[1] = 0;
if (LIST_FIRST(&np->n_cookies))
LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0;
nfs_dircookie_unlock(np);
}
/*
@ -797,8 +881,6 @@ nfs_clearcommit(struct mount *mp)
struct buf *bp, *nbp;
int s;
GIANT_REQUIRED;
s = splbio();
MNT_ILOCK(mp);
MNT_VNODE_FOREACH(vp, mp, nvp) {
@ -896,7 +978,7 @@ nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos)
int
nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
caddr_t *dpos)
caddr_t *dpos)
{
int t1;
@ -910,7 +992,7 @@ nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
int
nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
caddr_t *dpos)
caddr_t *dpos)
{
u_int32_t *tl;
int t1;
@ -945,9 +1027,11 @@ nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos)
tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos);
if (tl == NULL)
return EBADRPC;
mtx_lock(&(VTONFS(*v))->n_mtx);
if (*f)
ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) &&
VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3)));
mtx_unlock(&(VTONFS(*v))->n_mtx);
}
t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos);
if (t1)

View File

@ -35,6 +35,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bootp.h"
#include "opt_nfsroot.h"
@ -84,6 +85,7 @@ MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write st
uma_zone_t nfsmount_zone;
struct nfsstats nfsstats;
SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD,
&nfsstats, nfsstats, "S,nfsstats");
@ -183,7 +185,8 @@ nfs_iosize(struct nfsmount *nmp)
* space.
*/
iosize = max(nmp->nm_rsize, nmp->nm_wsize);
if (iosize < PAGE_SIZE) iosize = PAGE_SIZE;
if (iosize < PAGE_SIZE)
iosize = PAGE_SIZE;
return iosize;
}
@ -257,8 +260,12 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
return (error);
}
vp = NFSTOV(np);
if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
mtx_lock(&nmp->nm_mtx);
if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
mtx_unlock(&nmp->nm_mtx);
(void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
} else
mtx_unlock(&nmp->nm_mtx);
nfsstats.rpccnt[NFSPROC_FSSTAT]++;
mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
mb = mreq;
@ -273,7 +280,9 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
goto nfsmout;
}
sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
mtx_lock(&nmp->nm_mtx);
sbp->f_iosize = nfs_iosize(nmp);
mtx_unlock(&nmp->nm_mtx);
if (v3) {
sbp->f_bsize = NFS_FABLKSIZE;
tquad = fxdr_hyper(&sfp->sf_tbytes);
@ -314,7 +323,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
int error = 0, retattr;
struct mbuf *mreq, *mrep, *md, *mb;
u_int64_t maxfsize;
nfsstats.rpccnt[NFSPROC_FSINFO]++;
mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
mb = mreq;
@ -323,6 +332,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
nfsm_request(vp, NFSPROC_FSINFO, td, cred);
nfsm_postop_attr(vp, retattr);
if (!error) {
mtx_lock(&nmp->nm_mtx);
fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
@ -358,6 +368,7 @@ nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
nmp->nm_maxfilesize = maxfsize;
nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
nmp->nm_state |= NFSSTA_GOTFSINFO;
mtx_unlock(&nmp->nm_mtx);
}
m_freem(mrep);
nfsmout:
@ -664,8 +675,7 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
if (nmp->nm_sotype == SOCK_DGRAM)
while (nfs_connect(nmp, NULL)) {
printf("nfs_args: retrying connect\n");
(void) tsleep((caddr_t)&lbolt,
PSOCK, "nfscon", 0);
(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
}
}
}
@ -693,24 +703,31 @@ nfs_mount(struct mount *mp, struct thread *td)
size_t len;
u_char nfh[NFSX_V3FHMAX];
if (vfs_filteropt(mp->mnt_optnew, nfs_opts))
return (EINVAL);
if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
error = EINVAL;
goto out;
}
if (mp->mnt_flag & MNT_ROOTFS)
return (nfs_mountroot(mp, td));
if (mp->mnt_flag & MNT_ROOTFS) {
error = nfs_mountroot(mp, td);
goto out;
}
error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
if (error)
return (error);
goto out;
if (args.version != NFS_ARGSVERSION) {
return (EPROGMISMATCH);
error = EPROGMISMATCH;
goto out;
}
if (mp->mnt_flag & MNT_UPDATE) {
struct nfsmount *nmp = VFSTONFS(mp);
if (nmp == NULL)
return (EIO);
if (nmp == NULL) {
error = EIO;
goto out;
}
/*
* When doing an update, we can't change from or to
* v3, switch lockd strategies or change cookie translation
@ -720,7 +737,7 @@ nfs_mount(struct mount *mp, struct thread *td)
(nmp->nm_flag &
(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
nfs_decode_args(mp, nmp, &args);
return (0);
goto out;
}
/*
@ -734,21 +751,25 @@ nfs_mount(struct mount *mp, struct thread *td)
*/
if (nfs_ip_paranoia == 0)
args.flags |= NFSMNT_NOCONN;
if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX)
return (EINVAL);
if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
error = EINVAL;
goto out;
}
error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
if (error)
return (error);
goto out;
error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
if (error)
return (error);
goto out;
bzero(&hst[len], MNAMELEN - len);
/* sockargs() call must be after above copyin() calls */
error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
if (error)
return (error);
goto out;
args.fh = nfh;
error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
mp->mnt_kern_flag |= MNTK_MPSAFE;
out:
return (error);
}
@ -771,12 +792,11 @@ nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
error = copyin(data, &args, sizeof (struct nfs_args));
if (error)
return (error);
return error;
ma = mount_arg(ma, "nfs_args", &args, sizeof args);
error = kernel_mount(ma, flags);
return (error);
}
@ -805,6 +825,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
}
vfs_getnewfsid(mp);
nmp->nm_mountp = mp;
mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
/*
* V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
@ -851,10 +872,6 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
nfs_decode_args(mp, nmp, argp);
if (nmp->nm_sotype == SOCK_STREAM)
mtx_init(&nmp->nm_nfstcpstate.mtx, "NFS/TCP state lock",
NULL, MTX_DEF);
/*
* For Connection based sockets (TCP,...) defer the connect until
* the first request, in case the server is not responding.
@ -869,7 +886,9 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
* stuck on a dead server and we are holding a lock on the mount
* point.
*/
mtx_lock(&nmp->nm_mtx);
mp->mnt_stat.f_iosize = nfs_iosize(nmp);
mtx_unlock(&nmp->nm_mtx);
/*
* A reference count is needed on the nfsnode representing the
* remote root. If this object is not persistent, then backward
@ -900,8 +919,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
return (0);
bad:
if (nmp->nm_sotype == SOCK_STREAM)
mtx_destroy(&nmp->nm_nfstcpstate.mtx);
mtx_destroy(&nmp->nm_mtx);
nfs_disconnect(nmp);
uma_zfree(nfsmount_zone, nmp);
FREE(nam, M_SONAME);
@ -930,12 +948,12 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
if (flags & FORCECLOSE) {
error = nfs_nmcancelreqs(nmp);
if (error)
return (error);
goto out;
}
/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
error = vflush(mp, 1, flags, td);
if (error)
return (error);
goto out;
/*
* We are now committed to the unmount.
@ -943,11 +961,10 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
nfs_disconnect(nmp);
FREE(nmp->nm_nam, M_SONAME);
if (nmp->nm_sotype == SOCK_STREAM)
mtx_destroy(&nmp->nm_nfstcpstate.mtx);
mtx_destroy(&nmp->nm_mtx);
uma_zfree(nfsmount_zone, nmp);
return (0);
out:
return (error);
}
/*
@ -964,15 +981,18 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
nmp = VFSTONFS(mp);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
if (error)
return (error);
return error;
vp = NFSTOV(np);
/*
* Get transfer parameters and attributes for root vnode once.
*/
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
(nmp->nm_flag & NFSMNT_NFSV3)) {
mtx_unlock(&nmp->nm_mtx);
nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
}
} else
mtx_unlock(&nmp->nm_mtx);
if (vp->v_type == VNON)
vp->v_type = VDIR;
vp->v_vflag |= VV_ROOT;
@ -1051,8 +1071,10 @@ nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
break;
#endif
case VFS_CTL_QUERY:
mtx_lock(&nmp->nm_mtx);
if (nmp->nm_state & NFSSTA_TIMEO)
vq.vq_flags |= VQ_NOTRESP;
mtx_unlock(&nmp->nm_mtx);
#if 0
if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
(nmp->nm_state & NFSSTA_LOCKTIMEO))

View File

@ -192,6 +192,7 @@ static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
/*
* Global variables
*/
struct mtx nfs_iod_mtx;
struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
int nfs_numasync = 0;
@ -241,6 +242,23 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
| NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
| NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
/*
* SMP Locking Note :
* The list of locks after the description of the lock is the ordering
* of other locks acquired with the lock held.
* np->n_mtx : Protects the fields in the nfsnode.
VM Object Lock
VI_MTX (acquired indirectly)
* nmp->nm_mtx : Protects the fields in the nfsmount.
rep->r_mtx
* nfs_iod_mtx : Global lock, protects shared nfsiod state.
* nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
nmp->nm_mtx
rep->r_mtx
* rep->r_mtx : Protects the fields in an nfsreq.
*/
static int
nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
struct ucred *cred)
@ -266,9 +284,11 @@ nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
if (!error) {
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
mtx_lock(&np->n_mtx);
np->n_mode = rmode;
np->n_modeuid = cred->cr_uid;
np->n_modestamp = time_second;
mtx_unlock(&np->n_mtx);
}
m_freem(mrep);
nfsmout:
@ -343,6 +363,7 @@ nfs_access(struct vop_access_args *ap)
* Does our cached result allow us to give a definite yes to
* this request?
*/
mtx_lock(&np->n_mtx);
if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
(ap->a_cred->cr_uid == np->n_modeuid) &&
((np->n_mode & mode) == mode)) {
@ -352,18 +373,21 @@ nfs_access(struct vop_access_args *ap)
* Either a no, or a don't know. Go to the wire.
*/
nfsstats.accesscache_misses++;
mtx_unlock(&np->n_mtx);
error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
mtx_lock(&np->n_mtx);
if (!error) {
if ((np->n_mode & mode) != mode) {
error = EACCES;
}
}
}
mtx_unlock(&np->n_mtx);
return (error);
} else {
if ((error = nfsspec_access(ap)) != 0)
if ((error = nfsspec_access(ap)) != 0) {
return (error);
}
/*
* Attempt to prevent a mapped root from accessing a file
* which it shouldn't. We try to read a byte from the file
@ -371,12 +395,14 @@ nfs_access(struct vop_access_args *ap)
* After calling nfsspec_access, we should have the correct
* file size cached.
*/
mtx_lock(&np->n_mtx);
if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
&& VTONFS(vp)->n_size > 0) {
struct iovec aiov;
struct uio auio;
char buf[1];
mtx_unlock(&np->n_mtx);
aiov.iov_base = buf;
aiov.iov_len = 1;
auio.uio_iov = &aiov;
@ -400,7 +426,8 @@ nfs_access(struct vop_access_args *ap)
error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
else
error = EACCES;
}
} else
mtx_unlock(&np->n_mtx);
return (error);
}
}
@ -428,7 +455,9 @@ nfs_open(struct vop_open_args *ap)
/*
* Get a valid lease. If cached data is stale, flush it.
*/
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == EINTR || error == EIO)
return (error);
@ -438,20 +467,28 @@ nfs_open(struct vop_open_args *ap)
error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
if (error)
return (error);
mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
mtx_unlock(&np->n_mtx);
} else {
np->n_attrstamp = 0;
mtx_unlock(&np->n_mtx);
error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
if (error)
return (error);
mtx_lock(&np->n_mtx);
if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
if (vp->v_type == VDIR)
np->n_direofoffset = 0;
mtx_unlock(&np->n_mtx);
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == EINTR || error == EIO)
if (error == EINTR || error == EIO) {
return (error);
}
mtx_lock(&np->n_mtx);
np->n_mtime = vattr.va_mtime;
}
mtx_unlock(&np->n_mtx);
}
/*
* If the object has >= 1 O_DIRECT active opens, we disable caching.
@ -461,11 +498,12 @@ nfs_open(struct vop_open_args *ap)
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error)
return (error);
mtx_lock(&np->n_mtx);
np->n_flag |= NNONCACHE;
mtx_unlock(&np->n_mtx);
}
np->n_directio_opens++;
}
np->ra_expect_lbn = 0;
vnode_create_vobject(vp, vattr.va_size, ap->a_td);
return (0);
}
@ -519,7 +557,9 @@ nfs_close(struct vop_close_args *ap)
vm_object_page_clean(vp->v_object, 0, 0, 0);
VM_OBJECT_UNLOCK(vp->v_object);
}
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
if (NFS_ISV3(vp)) {
/*
* Under NFSv3 we have dirty buffers to dispose of. We
@ -539,6 +579,7 @@ nfs_close(struct vop_close_args *ap)
/* np->n_flag &= ~NMODIFIED; */
} else
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
mtx_lock(&np->n_mtx);
}
/*
* Invalidate the attribute cache in all cases.
@ -551,13 +592,16 @@ nfs_close(struct vop_close_args *ap)
np->n_flag &= ~NWRITEERR;
error = np->n_error;
}
mtx_unlock(&np->n_mtx);
}
if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
mtx_lock(&np->n_mtx);
KASSERT((np->n_directio_opens > 0),
("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
np->n_directio_opens--;
if (np->n_directio_opens == 0)
np->n_flag &= ~NNONCACHE;
mtx_unlock(&np->n_mtx);
}
return (error);
}
@ -578,21 +622,21 @@ nfs_getattr(struct vop_getattr_args *ap)
/*
* Update local times for special files.
*/
mtx_lock(&np->n_mtx);
if (np->n_flag & (NACC | NUPD))
np->n_flag |= NCHG;
mtx_unlock(&np->n_mtx);
/*
* First look in the cache.
*/
if (nfs_getattrcache(vp, ap->a_vap) == 0)
return (0);
goto nfsmout;
if (v3 && nfsaccess_cache_timeout > 0) {
nfsstats.accesscache_misses++;
nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
if (nfs_getattrcache(vp, ap->a_vap) == 0)
return (0);
goto nfsmout;
}
nfsstats.rpccnt[NFSPROC_GETATTR]++;
mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
mb = mreq;
@ -635,8 +679,10 @@ nfs_setattr(struct vop_setattr_args *ap)
if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
(vp->v_mount->mnt_flag & MNT_RDONLY))
return (EROFS);
(vp->v_mount->mnt_flag & MNT_RDONLY)) {
error = EROFS;
goto out;
}
if (vap->va_size != VNOVAL) {
switch (vp->v_type) {
case VDIR:
@ -650,7 +696,7 @@ nfs_setattr(struct vop_setattr_args *ap)
vap->va_mode == (mode_t)VNOVAL &&
vap->va_uid == (uid_t)VNOVAL &&
vap->va_gid == (gid_t)VNOVAL)
return (0);
return (0);
vap->va_size = VNOVAL;
break;
default:
@ -660,47 +706,60 @@ nfs_setattr(struct vop_setattr_args *ap)
*/
if (vp->v_mount->mnt_flag & MNT_RDONLY)
return (EROFS);
/*
* We run vnode_pager_setsize() early (why?),
* we must set np->n_size now to avoid vinvalbuf
* V_SAVE races that might setsize a lower
* value.
*/
mtx_lock(&np->n_mtx);
tsize = np->n_size;
mtx_unlock(&np->n_mtx);
error = nfs_meta_setsize(vp, ap->a_cred,
ap->a_td, vap->va_size);
ap->a_td, vap->va_size);
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
tsize = np->n_size;
mtx_unlock(&np->n_mtx);
if (vap->va_size == 0)
error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
else
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error) {
vnode_pager_setsize(vp, np->n_size);
return (error);
vnode_pager_setsize(vp, tsize);
goto out;
}
}
} else
mtx_unlock(&np->n_mtx);
/*
* np->n_size has already been set to vap->va_size
* in nfs_meta_setsize(). We must set it again since
* nfs_loadattrcache() could be called through
* nfs_meta_setsize() and could modify np->n_size.
*/
mtx_lock(&np->n_mtx);
np->n_vattr.va_size = np->n_size = vap->va_size;
mtx_unlock(&np->n_mtx);
};
} else if ((vap->va_mtime.tv_sec != VNOVAL ||
vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
vp->v_type == VREG &&
(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
(error == EINTR || error == EIO))
return (error);
} else {
mtx_lock(&np->n_mtx);
if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
(np->n_flag & NMODIFIED) && vp->v_type == VREG) {
mtx_unlock(&np->n_mtx);
if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
(error == EINTR || error == EIO))
return error;
} else
mtx_unlock(&np->n_mtx);
}
error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
if (error && vap->va_size != VNOVAL) {
mtx_lock(&np->n_mtx);
np->n_size = np->n_vattr.va_size = tsize;
vnode_pager_setsize(vp, np->n_size);
vnode_pager_setsize(vp, tsize);
mtx_unlock(&np->n_mtx);
}
out:
return (error);
}
@ -779,7 +838,7 @@ nfs_lookup(struct vop_lookup_args *ap)
int error = 0, attrflag, fhsize;
int v3 = NFS_ISV3(dvp);
struct thread *td = cnp->cn_thread;
*vpp = NULLVP;
if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
@ -964,8 +1023,10 @@ nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
nfsm_strsiz(len, NFS_MAXPATHLEN);
if (len == NFS_MAXPATHLEN) {
struct nfsnode *np = VTONFS(vp);
mtx_lock(&np->n_mtx);
if (np->n_size && np->n_size < NFS_MAXPATHLEN)
len = np->n_size;
mtx_unlock(&np->n_mtx);
}
nfsm_mtouio(uiop, len);
}
@ -987,17 +1048,23 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
struct nfsmount *nmp;
int error = 0, len, retlen, tsiz, eof, attrflag;
int v3 = NFS_ISV3(vp);
int rsize;
#ifndef nolint
eof = 0;
#endif
nmp = VFSTONFS(vp->v_mount);
tsiz = uiop->uio_resid;
if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
mtx_lock(&nmp->nm_mtx);
if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
mtx_unlock(&nmp->nm_mtx);
return (EFBIG);
}
rsize = nmp->nm_rsize;
mtx_unlock(&nmp->nm_mtx);
while (tsiz > 0) {
nfsstats.rpccnt[NFSPROC_READ]++;
len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
len = (tsiz > rsize) ? rsize : tsiz;
mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
mb = mreq;
bpos = mtod(mb, caddr_t);
@ -1020,9 +1087,10 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
}
tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
eof = fxdr_unsigned(int, *(tl + 1));
} else
} else {
nfsm_loadattr(vp, NULL);
nfsm_strsiz(retlen, nmp->nm_rsize);
}
nfsm_strsiz(retlen, rsize);
nfsm_mtouio(uiop, retlen);
m_freem(mrep);
tsiz -= retlen;
@ -1043,7 +1111,7 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
*/
int
nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
int *iomode, int *must_commit)
int *iomode, int *must_commit)
{
u_int32_t *tl;
int32_t backup;
@ -1052,18 +1120,24 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
int wsize;
#ifndef DIAGNOSTIC
if (uiop->uio_iovcnt != 1)
panic("nfs: writerpc iovcnt > 1");
#endif
*must_commit = 0;
tsiz = uiop->uio_resid;
if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
mtx_lock(&nmp->nm_mtx);
if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
mtx_unlock(&nmp->nm_mtx);
return (EFBIG);
}
wsize = nmp->nm_wsize;
mtx_unlock(&nmp->nm_mtx);
while (tsiz > 0) {
nfsstats.rpccnt[NFSPROC_WRITE]++;
len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
len = (tsiz > wsize) ? wsize : tsiz;
mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
mb = mreq;
@ -1122,6 +1196,7 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
else if (committed == NFSV3WRITE_DATASYNC &&
commit == NFSV3WRITE_UNSTABLE)
committed = commit;
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
NFSX_V3WRITEVERF);
@ -1132,11 +1207,16 @@ nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
NFSX_V3WRITEVERF);
}
mtx_unlock(&nmp->nm_mtx);
}
} else
nfsm_loadattr(vp, NULL);
if (wccflag)
VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
} else {
nfsm_loadattr(vp, NULL);
}
if (wccflag) {
mtx_lock(&(VTONFS(vp))->n_mtx);
VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
mtx_unlock(&(VTONFS(vp))->n_mtx);
}
m_freem(mrep);
if (error)
break;
@ -1232,9 +1312,11 @@ nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
cache_enter(dvp, newvp, cnp);
*vpp = newvp;
}
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@ -1246,7 +1328,6 @@ nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
static int
nfs_mknod(struct vop_mknod_args *ap)
{
return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
}
@ -1359,9 +1440,11 @@ nfs_create(struct vop_create_args *ap)
cache_enter(dvp, newvp, cnp);
*ap->a_vpp = newvp;
}
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@ -1434,7 +1517,6 @@ nfs_remove(struct vop_remove_args *ap)
int
nfs_removeit(struct sillyrename *sp)
{
/*
* Make sure that the directory vnode is still valid.
* XXX we should lock sp->s_dvp here.
@ -1469,9 +1551,11 @@ nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
return (error);
}
@ -1502,7 +1586,7 @@ nfs_rename(struct vop_rename_args *ap)
}
if (fvp == tvp) {
printf("nfs_rename: fvp == tvp (can't happen)\n");
nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
error = 0;
goto out;
}
@ -1609,8 +1693,12 @@ nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
}
m_freem(mrep);
nfsmout:
mtx_lock(&(VTONFS(fdvp))->n_mtx);
VTONFS(fdvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(fdvp))->n_mtx);
mtx_lock(&(VTONFS(tdvp))->n_mtx);
VTONFS(tdvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(tdvp))->n_mtx);
if (!fwccflag)
VTONFS(fdvp)->n_attrstamp = 0;
if (!twccflag)
@ -1659,7 +1747,9 @@ nfs_link(struct vop_link_args *ap)
}
m_freem(mrep);
nfsmout:
mtx_lock(&(VTONFS(tdvp))->n_mtx);
VTONFS(tdvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(tdvp))->n_mtx);
if (!attrflag)
VTONFS(vp)->n_attrstamp = 0;
if (!wccflag)
@ -1758,7 +1848,9 @@ nfs_symlink(struct vop_symlink_args *ap)
} else {
*ap->a_vpp = newvp;
}
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
return (error);
@ -1813,7 +1905,9 @@ nfs_mkdir(struct vop_mkdir_args *ap)
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
/*
@ -1869,7 +1963,9 @@ nfs_rmdir(struct vop_rmdir_args *ap)
nfsm_wcc_data(dvp, wccflag);
m_freem(mrep);
nfsmout:
mtx_lock(&(VTONFS(dvp))->n_mtx);
VTONFS(dvp)->n_flag |= NMODIFIED;
mtx_unlock(&(VTONFS(dvp))->n_mtx);
if (!wccflag)
VTONFS(dvp)->n_attrstamp = 0;
cache_purge(dvp);
@ -1891,20 +1987,25 @@ nfs_readdir(struct vop_readdir_args *ap)
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
struct uio *uio = ap->a_uio;
int tresid, error;
int tresid, error = 0;
struct vattr vattr;
if (vp->v_type != VDIR)
return(EPERM);
if (vp->v_type != VDIR)
return (EPERM);
/*
* First, check for hit on the EOF offset cache
*/
if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
(np->n_flag & NMODIFIED) == 0) {
if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 &&
!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
nfsstats.direofcache_hits++;
return (0);
if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) {
mtx_lock(&np->n_mtx);
if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
mtx_unlock(&np->n_mtx);
nfsstats.direofcache_hits++;
goto out;
} else
mtx_unlock(&np->n_mtx);
}
}
@ -1914,8 +2015,10 @@ nfs_readdir(struct vop_readdir_args *ap)
tresid = uio->uio_resid;
error = nfs_bioread(vp, uio, 0, ap->a_cred);
if (!error && uio->uio_resid == tresid)
if (!error && uio->uio_resid == tresid) {
nfsstats.direofcache_misses++;
}
out:
return (error);
}
@ -1950,11 +2053,16 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
/*
* If there is no cookie, assume directory was stale.
*/
nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
if (cookiep)
if (cookiep) {
cookie = *cookiep;
else
nfs_dircookie_unlock(dnp);
} else {
nfs_dircookie_unlock(dnp);
return (NFSERR_BAD_COOKIE);
}
/*
* Loop around doing readdir rpc's of size nm_readdirsize
* truncated to a multiple of DIRBLKSIZ.
@ -1971,8 +2079,10 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
*tl++ = cookie.nfsuquad[1];
mtx_lock(&dnp->n_mtx);
*tl++ = dnp->n_cookieverf.nfsuquad[0];
*tl++ = dnp->n_cookieverf.nfsuquad[1];
mtx_unlock(&dnp->n_mtx);
} else {
tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
@ -1984,8 +2094,10 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
if (!error) {
tl = nfsm_dissect(u_int32_t *,
2 * NFSX_UNSIGNED);
mtx_lock(&dnp->n_mtx);
dnp->n_cookieverf.nfsuquad[0] = *tl++;
dnp->n_cookieverf.nfsuquad[1] = *tl;
mtx_unlock(&dnp->n_mtx);
} else {
m_freem(mrep);
goto nfsmout;
@ -2100,9 +2212,11 @@ nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
dnp->n_direofoffset = uiop->uio_offset;
else {
if (uiop->uio_resid > 0)
printf("EEK! readdirrpc resid > 0\n");
nfs_printf("EEK! readdirrpc resid > 0\n");
nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
*cookiep = cookie;
nfs_dircookie_unlock(dnp);
}
nfsmout:
return (error);
@ -2146,11 +2260,15 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
/*
* If there is no cookie, assume directory was stale.
*/
nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
if (cookiep)
if (cookiep) {
cookie = *cookiep;
else
nfs_dircookie_unlock(dnp);
} else {
nfs_dircookie_unlock(dnp);
return (NFSERR_BAD_COOKIE);
}
/*
* Loop around doing readdir rpc's of size nm_readdirsize
* truncated to a multiple of DIRBLKSIZ.
@ -2166,8 +2284,10 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
*tl++ = cookie.nfsuquad[0];
*tl++ = cookie.nfsuquad[1];
mtx_lock(&dnp->n_mtx);
*tl++ = dnp->n_cookieverf.nfsuquad[0];
*tl++ = dnp->n_cookieverf.nfsuquad[1];
mtx_unlock(&dnp->n_mtx);
*tl++ = txdr_unsigned(nmp->nm_readdirsize);
*tl = txdr_unsigned(nmp->nm_rsize);
nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
@ -2177,8 +2297,10 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
goto nfsmout;
}
tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
mtx_lock(&dnp->n_mtx);
dnp->n_cookieverf.nfsuquad[0] = *tl++;
dnp->n_cookieverf.nfsuquad[1] = *tl++;
mtx_unlock(&dnp->n_mtx);
more_dirs = fxdr_unsigned(int, *tl);
/* loop thru the dir entries, doctoring them to 4bsd form */
@ -2313,9 +2435,9 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
i = fxdr_unsigned(int, *tl);
if (i) {
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
fhsize = fxdr_unsigned(int, *tl);
nfsm_adv(nfsm_rndup(fhsize));
tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
fhsize = fxdr_unsigned(int, *tl);
nfsm_adv(nfsm_rndup(fhsize));
}
}
if (newvp != NULLVP) {
@ -2359,9 +2481,11 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
dnp->n_direofoffset = uiop->uio_offset;
else {
if (uiop->uio_resid > 0)
printf("EEK! readdirplusrpc resid > 0\n");
nfs_printf("EEK! readdirplusrpc resid > 0\n");
nfs_dircookie_lock(dnp);
cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
*cookiep = cookie;
nfs_dircookie_unlock(dnp);
}
nfsmout:
if (newvp != NULLVP) {
@ -2521,7 +2645,7 @@ nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
*/
int
nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
struct thread *td)
struct thread *td)
{
u_int32_t *tl;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
@ -2529,8 +2653,12 @@ nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
int error = 0, wccflag = NFSV3_WCCRATTR;
struct mbuf *mreq, *mrep, *md, *mb;
if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
mtx_lock(&nmp->nm_mtx);
if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
mtx_unlock(&nmp->nm_mtx);
return (0);
}
mtx_unlock(&nmp->nm_mtx);
nfsstats.rpccnt[NFSPROC_COMMIT]++;
mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
mb = mreq;
@ -2600,7 +2728,6 @@ nfs_strategy(struct vop_strategy_args *ap)
static int
nfs_fsync(struct vop_fsync_args *ap)
{
return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
}
@ -2821,8 +2948,10 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
splx(s);
if (error == 0)
panic("nfs_fsync: inconsistent lock");
if (error == 0) {
BUF_UNLOCK(bp);
goto loop;
}
if (error == ENOLCK)
goto loop;
if (nfs_sigintr(nmp, NULL, td)) {
@ -2880,23 +3009,28 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
VI_UNLOCK(vp);
goto loop;
}
/*
* Wait for all the async IO requests to drain
/*
* Wait for all the async IO requests to drain
*/
VI_UNLOCK(vp);
mtx_lock(&np->n_mtx);
while (np->n_directio_asyncwr > 0) {
np->n_flag |= NFSYNCWAIT;
error = nfs_tsleep(td, (caddr_t)&np->n_directio_asyncwr,
slpflag | (PRIBIO + 1), "nfsfsync", 0);
error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
&np->n_mtx, slpflag | (PRIBIO + 1),
"nfsfsync", 0);
if (error) {
if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
error = EINTR;
mtx_unlock(&np->n_mtx);
error = EINTR;
goto done;
}
}
}
}
VI_UNLOCK(vp);
mtx_unlock(&np->n_mtx);
} else
VI_UNLOCK(vp);
mtx_lock(&np->n_mtx);
if (np->n_flag & NWRITEERR) {
error = np->n_error;
np->n_flag &= ~NWRITEERR;
@ -2904,6 +3038,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
np->n_flag &= ~NMODIFIED;
mtx_unlock(&np->n_mtx);
done:
if (bvec != NULL && bvec != bvec_on_stack)
free(bvec, M_TEMP);
@ -2916,13 +3051,19 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
static int
nfs_advlock(struct vop_advlock_args *ap)
{
int error;
mtx_lock(&Giant);
if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
struct nfsnode *np = VTONFS(ap->a_vp);
return (lf_advlock(ap, &(np->n_lockf), np->n_size));
error = lf_advlock(ap, &(np->n_lockf), np->n_size);
goto out;
}
return (nfs_dolock(ap));
error = nfs_dolock(ap);
out:
mtx_unlock(&Giant);
return (error);
}
/*
@ -2934,7 +3075,7 @@ nfs_print(struct vop_print_args *ap)
struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
printf("\tfileid %ld fsid 0x%x",
nfs_printf("\tfileid %ld fsid 0x%x",
np->n_vattr.va_fileid, np->n_vattr.va_fsid);
if (vp->v_type == VFIFO)
fifo_printinfo(vp);
@ -2998,7 +3139,6 @@ nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
reassignbuf(bp);
splx(s);
}
brelse(bp);
return (rtval);
}
@ -3039,9 +3179,11 @@ nfsspec_access(struct vop_access_args *ap)
vap = &vattr;
error = VOP_GETATTR(vp, vap, cred, ap->a_td);
if (error)
return (error);
return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
mode, cred, NULL));
goto out;
error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
mode, cred, NULL);
out:
return error;
}
/*
@ -3051,13 +3193,17 @@ static int
nfsfifo_read(struct vop_read_args *ap)
{
struct nfsnode *np = VTONFS(ap->a_vp);
int error;
/*
* Set access flag.
*/
mtx_lock(&np->n_mtx);
np->n_flag |= NACC;
getnanotime(&np->n_atim);
return (fifo_specops.vop_read(ap));
mtx_unlock(&np->n_mtx);
error = fifo_specops.vop_read(ap);
return error;
}
/*
@ -3071,9 +3217,11 @@ nfsfifo_write(struct vop_write_args *ap)
/*
* Set update flag.
*/
mtx_lock(&np->n_mtx);
np->n_flag |= NUPD;
getnanotime(&np->n_mtim);
return (fifo_specops.vop_write(ap));
mtx_unlock(&np->n_mtx);
return(fifo_specops.vop_write(ap));
}
/*
@ -3089,6 +3237,7 @@ nfsfifo_close(struct vop_close_args *ap)
struct vattr vattr;
struct timespec ts;
mtx_lock(&np->n_mtx);
if (np->n_flag & (NACC | NUPD)) {
getnanotime(&ts);
if (np->n_flag & NACC)
@ -3103,9 +3252,13 @@ nfsfifo_close(struct vop_close_args *ap)
vattr.va_atime = np->n_atim;
if (np->n_flag & NUPD)
vattr.va_mtime = np->n_mtim;
mtx_unlock(&np->n_mtx);
(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
goto out;
}
}
mtx_unlock(&np->n_mtx);
out:
return (fifo_specops.vop_close(ap));
}

View File

@ -41,7 +41,6 @@ struct nfs_tcp_mountstate {
#define NFS_TCP_EXPECT_RPCMARKER 0x0001 /* Expect to see a RPC/TCP marker next */
#define NFS_TCP_FORCE_RECONNECT 0x0002 /* Force a TCP reconnect */
int flags;
struct mtx mtx;
};
/*
@ -50,6 +49,7 @@ struct nfs_tcp_mountstate {
* Holds NFS specific information for mount.
*/
struct nfsmount {
struct mtx nm_mtx;
int nm_flag; /* Flags for soft/hard... */
int nm_state; /* Internal state flags */
struct mount *nm_mountp; /* Vfs structure for this filesystem */

View File

@ -88,6 +88,7 @@ struct nfsdmap {
* be well aligned and, therefore, tightly packed.
*/
struct nfsnode {
struct mtx n_mtx; /* Protects all of these members */
u_quad_t n_size; /* Current size of file */
u_quad_t n_brev; /* Modify rev when cached */
u_quad_t n_lrev; /* Modify rev for lease */
@ -124,9 +125,8 @@ struct nfsnode {
struct nfs4_fctx n_wfc;
u_char *n_name; /* leaf name, for v4 OPEN op */
uint32_t n_namelen;
daddr_t ra_expect_lbn;
int n_directio_opens;
int n_directio_asyncwr;
int n_directio_asyncwr;
};
#define n_atim n_un1.nf_atim
@ -140,6 +140,8 @@ struct nfsnode {
/*
* Flags for n_flag
*/
#define NFSYNCWAIT 0x0002 /* fsync waiting for all directio async writes
to drain */
#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
#define NWRITEERR 0x0008 /* Flag write errors so close will know */
/* 0x20, 0x40, 0x80 free */
@ -150,8 +152,7 @@ struct nfsnode {
#define NTRUNCATE 0x1000 /* Opened by nfs_setattr() */
#define NSIZECHANGED 0x2000 /* File size has changed: need cache inval */
#define NNONCACHE 0x4000 /* Node marked as noncacheable */
#define NFSYNCWAIT 0x8000 /* fsync waiting for all directio async writes
to drain */
#define NDIRCOOKIELK 0x8000 /* Lock to serialize access to directory cookies */
/*
* Convert between nfsnode pointers and vnode pointers
@ -193,6 +194,12 @@ nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int);
uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int);
void nfs_invaldir(struct vnode *);
void nfs4_invaldir(struct vnode *);
int nfs_upgrade_vnlock(struct vnode *vp, struct thread *td);
void nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock);
void nfs_printf(const char *fmt, ...);
void nfs_dircookie_lock(struct nfsnode *np);
void nfs_dircookie_unlock(struct nfsnode *np);
#endif /* _KERNEL */