nfsd: Add support for the NFSv4.2 Deallocate operation

The recently added VOP_DEALLOCATE(9) VOP call allows
implementation of the Deallocate NFSv4.2 operation.

Since the Deallocate operation is a single succeed/fail
operation, the call to VOP_DEALLOCATE(9) loops so long
as progress is being made.  It calls maybe_yield()
between loop iterations to allow other processes
to preempt it.

Where RFC 7862 underspecifies behaviour, the code
is written to be Linux NFSv4.2 server compatible.

Reviewed by:	khng
Differential Revision:	https://reviews.freebsd.org/D31624
This commit is contained in:
Rick Macklem 2021-08-26 18:14:11 -07:00
parent c5af0ac1a7
commit bb958dcf3d
4 changed files with 311 additions and 1 deletions

View File

@ -282,6 +282,8 @@ int nfsrvd_teststateid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_allocate(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_deallocate(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_copy_file_range(struct nfsrv_descript *, int,
vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *);
int nfsrvd_seek(struct nfsrv_descript *, int,
@ -752,6 +754,8 @@ int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *);
int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int,
bool *, struct ucred *, NFSPROC_T *);
int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *);
int nfsvno_deallocate(struct vnode *, off_t, off_t, struct ucred *,
NFSPROC_T *);
int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *,
uint64_t, int, struct thread *, struct mbuf **, struct mbuf **, int *);
int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *,

View File

@ -132,6 +132,8 @@ static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
char *, int *);
static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
struct vnode *, struct nfsmount **, int, struct acl *, int *);
static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
@ -4898,6 +4900,9 @@ tryagain:
} else if (ioproc == NFSPROC_ALLOCATE)
error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
&nmp[0], mirrorcnt, &failpos);
else if (ioproc == NFSPROC_DEALLOCATE)
error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
vp, &nmp[0], mirrorcnt, &failpos);
else {
error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
vp, nmp[mirrorcnt - 1], nap);
@ -5679,6 +5684,166 @@ nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
return (error);
}
/*
* Do a deallocate RPC on a DS data file, using this structure for the
* arguments, so that this function can be executed by a separate kernel
* process.
*/
struct nfsrvdeallocatedsdorpc {
int done;
int inprog;
struct task tsk;
fhandle_t fh;
off_t off;
off_t len;
struct nfsmount *nmp;
struct ucred *cred;
NFSPROC_T *p;
int err;
};
static int
nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
{
uint32_t *tl;
struct nfsrv_descript *nd;
nfsattrbit_t attrbits;
nfsv4stateid_t st;
int error;
nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
sizeof(fhandle_t), NULL, NULL, 0, 0);
/*
* Use a stateid where other is an alternating 01010 pattern and
* seqid is 0xffffffff. This value is not defined as special by
* the RFC and is used by the FreeBSD NFS server to indicate an
* MDS->DS proxy operation.
*/
st.other[0] = 0x55555555;
st.other[1] = 0x55555555;
st.other[2] = 0x55555555;
st.seqid = 0xffffffff;
nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
txdr_hyper(off, tl); tl += 2;
txdr_hyper(len, tl); tl += 2;
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
*tl = txdr_unsigned(NFSV4OP_GETATTR);
NFSGETATTR_ATTRBIT(&attrbits);
nfsrv_putattrbit(nd, &attrbits);
error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
if (error != 0) {
free(nd, M_TEMP);
return (error);
}
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft allocaterpc=%d\n",
nd->nd_repstat);
if (nd->nd_repstat == 0) {
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
} else
error = nd->nd_repstat;
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
nfsmout:
m_freem(nd->nd_mrep);
free(nd, M_TEMP);
NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
return (error);
}
/*
* Start up the thread that will execute nfsrv_deallocatedsdorpc().
*/
static void
start_deallocatedsdorpc(void *arg, int pending)
{
struct nfsrvdeallocatedsdorpc *drpc;
drpc = (struct nfsrvdeallocatedsdorpc *)arg;
drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
drpc->len, NULL, drpc->cred, drpc->p);
drpc->done = 1;
NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
}
static int
nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
int *failposp)
{
struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
struct nfsvattr na;
int error, i, ret, timo;
NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
drpc = NULL;
if (mirrorcnt > 1)
tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
M_WAITOK);
/*
* Do the deallocate RPC for every DS, using a separate kernel process
* for every DS except the last one.
*/
error = 0;
for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
tdrpc->done = 0;
NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
tdrpc->off = off;
tdrpc->len = len;
tdrpc->nmp = *nmpp;
tdrpc->cred = cred;
tdrpc->p = p;
tdrpc->inprog = 0;
tdrpc->err = 0;
ret = EIO;
if (nfs_pnfsiothreads != 0) {
ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
ret);
}
if (ret != 0) {
ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
NULL, cred, p);
if (nfsds_failerr(ret) && *failposp == -1)
*failposp = i;
else if (error == 0 && ret != 0)
error = ret;
}
nmpp++;
fhp++;
}
ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
*failposp = mirrorcnt - 1;
else if (error == 0 && ret != 0)
error = ret;
if (error == 0)
error = nfsrv_setextattr(vp, &na, p);
NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
tdrpc = drpc;
timo = hz / 50; /* Wait for 20msec. */
if (timo < 1)
timo = 1;
for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
/* Wait for RPCs on separate threads to complete. */
while (tdrpc->inprog != 0 && tdrpc->done == 0)
tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
if (nfsds_failerr(tdrpc->err) && *failposp == -1)
*failposp = i;
else if (error == 0 && tdrpc->err != 0)
error = tdrpc->err;
}
free(drpc, M_TEMP);
return (error);
}
static int
nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
@ -6425,6 +6590,42 @@ nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
return (error);
}
/*
* Deallocate vnode op call.
*/
int
nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
NFSPROC_T *p)
{
int error;
off_t olen;
ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
/*
* Attempt to deallocate on a DS file. A return of ENOENT implies
* there is no DS file to deallocate on.
*/
error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
NULL, NULL, NULL, NULL, &len, 0, NULL);
if (error != ENOENT)
return (error);
/*
* Do the actual VOP_DEALLOCATE(), looping so long as
* progress is being made, to achieve completion.
*/
do {
olen = len;
error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
if (error == 0 && len > 0 && olen > len)
maybe_yield();
} while (error == 0 && len > 0 && olen > len);
if (error == 0 && len > 0)
error = NFSERR_IO;
NFSEXITCODE(error);
return (error);
}
/*
* Get Extended Atribute vnode op into an mbuf list.
*/

View File

@ -5389,6 +5389,111 @@ nfsmout:
return (error);
}
/*
* nfs deallocate service
*/
int
nfsrvd_deallocate(struct nfsrv_descript *nd, __unused int isdgram,
vnode_t vp, struct nfsexstuff *exp)
{
uint32_t *tl;
struct nfsvattr forat;
int error = 0, forat_ret = 1, gotproxystateid;
off_t off, len;
struct nfsstate st, *stp = &st;
struct nfslock lo, *lop = &lo;
nfsv4stateid_t stateid;
nfsquad_t clientid;
nfsattrbit_t attrbits;
gotproxystateid = 0;
NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
lop->lo_flags = NFSLCK_WRITE;
stp->ls_ownerlen = 0;
stp->ls_op = NULL;
stp->ls_uid = nd->nd_cred->cr_uid;
stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
if ((nd->nd_flag & ND_NFSV41) != 0)
clientid.qval = nd->nd_clientid.qval;
else if (nd->nd_clientid.qval != clientid.qval)
printf("EEK2 multiple clids\n");
} else {
if ((nd->nd_flag & ND_NFSV41) != 0)
printf("EEK! no clientid from session\n");
nd->nd_flag |= ND_IMPLIEDCLID;
nd->nd_clientid.qval = clientid.qval;
}
stp->ls_stateid.other[2] = *tl++;
/*
* Don't allow this to be done for a DS.
*/
if ((nd->nd_flag & ND_DSSERVER) != 0)
nd->nd_repstat = NFSERR_NOTSUPP;
/* However, allow the proxy stateid. */
if (stp->ls_stateid.seqid == 0xffffffff &&
stp->ls_stateid.other[0] == 0x55555555 &&
stp->ls_stateid.other[1] == 0x55555555 &&
stp->ls_stateid.other[2] == 0x55555555)
gotproxystateid = 1;
off = fxdr_hyper(tl); tl += 2;
lop->lo_first = off;
len = fxdr_hyper(tl);
if (len < 0)
len = OFF_MAX;
NFSD_DEBUG(4, "dealloc: off=%jd len=%jd\n", (intmax_t)off,
(intmax_t)len);
lop->lo_end = lop->lo_first + len;
/*
* Sanity check the offset and length.
* off and len are off_t (signed int64_t) whereas
* lo_first and lo_end are uint64_t and, as such,
* if off >= 0 && len > 0, lo_end cannot overflow
* unless off_t is changed to something other than
* int64_t. Check lo_end < lo_first in case that
* is someday the case.
* The error to return is not specified by RFC 7862 so I
* made this compatible with the Linux knfsd.
*/
if (nd->nd_repstat == 0) {
if (off < 0 || lop->lo_end > NFSRV_MAXFILESIZE)
nd->nd_repstat = NFSERR_FBIG;
else if (len == 0 || lop->lo_end < lop->lo_first)
nd->nd_repstat = NFSERR_INVAL;
}
if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG)
nd->nd_repstat = NFSERR_WRONGTYPE;
NFSZERO_ATTRBIT(&attrbits);
NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits);
if (nd->nd_repstat == 0)
nd->nd_repstat = forat_ret;
if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid ||
NFSVNO_EXSTRICTACCESS(exp)))
nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp,
curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
NULL);
if (nd->nd_repstat == 0 && gotproxystateid == 0)
nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
&stateid, exp, nd, curthread);
if (nd->nd_repstat == 0)
nd->nd_repstat = nfsvno_deallocate(vp, off, len, nd->nd_cred,
curthread);
vput(vp);
NFSD_DEBUG(4, "eo deallocate=%d\n", nd->nd_repstat);
NFSEXITCODE2(0, nd);
return (0);
nfsmout:
vput(vp);
NFSEXITCODE2(error, nd);
return (error);
}
/*
* nfs copy service
*/

View File

@ -198,7 +198,7 @@ int (*nfsrv4_ops0[NFSV42_NOPS])(struct nfsrv_descript *,
nfsrvd_allocate,
(int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0,
nfsrvd_notsupp,
nfsrvd_notsupp,
nfsrvd_deallocate,
nfsrvd_ioadvise,
nfsrvd_layouterror,
nfsrvd_layoutstats,