diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 147f1905e222..e1a74536c498 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -1714,6 +1714,19 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) * the vp's paging queues so we cannot call bdirty(). The * bp in this case is not an NFS cache block so we should * be safe. XXX + * + * The logic below breaks up errors into recoverable and + * unrecoverable. For the former, we clear B_INVAL|B_NOCACHE + * and keep the buffer around for potential write retries. + * For the latter (eg ESTALE), we toss the buffer away (B_INVAL) + * and save the error in the nfsnode. This is less than ideal + * but necessary. Keeping such buffers around could potentially + * cause buffer exhaustion eventually (they can never be written + * out, so will get constantly be re-dirtied). It also causes + * all sorts of vfs panics. For non-recoverable write errors, + * also invalidate the attrcache, so we'll be forced to go over + * the wire for this object, returning an error to user on next + * call (most of the time). */ if (error == EINTR || error == EIO || error == ETIMEDOUT || (!error && (bp->b_flags & B_NEEDCOMMIT))) { @@ -1731,9 +1744,11 @@ nfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) } else { if (error) { bp->b_ioflags |= BIO_ERROR; + bp->b_flags |= B_INVAL; bp->b_error = np->n_error = error; mtx_lock(&np->n_mtx); np->n_flag |= NWRITEERR; + np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); } bp->b_dirtyoff = bp->b_dirtyend = 0;