2009-05-04 15:23:58 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1989, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Rick Macklem at The University of Guelph.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2014-03-16 10:55:57 +00:00
|
|
|
#include <sys/capsicum.h>
|
2011-08-11 12:30:23 +00:00
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Functions that perform the vfs operations required by the routines in
|
|
|
|
* nfsd_serv.c. It is hoped that this change will make the server more
|
|
|
|
* portable.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <fs/nfs/nfsport.h>
|
2010-10-23 22:28:29 +00:00
|
|
|
#include <sys/hash.h>
|
2009-05-04 15:23:58 +00:00
|
|
|
#include <sys/sysctl.h>
|
2009-05-21 01:50:27 +00:00
|
|
|
#include <nlm/nlm_prot.h>
|
|
|
|
#include <nlm/nlm.h>
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2011-02-25 10:11:01 +00:00
|
|
|
FEATURE(nfsd, "NFSv4 server");
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
|
|
|
|
extern int nfsrv_useacl;
|
|
|
|
extern int newnfs_numnfsd;
|
|
|
|
extern struct mount nfsv4root_mnt;
|
|
|
|
extern struct nfsrv_stablefirst nfsrv_stablefirst;
|
|
|
|
extern void (*nfsd_call_servertimer)(void);
|
2011-04-10 20:43:07 +00:00
|
|
|
extern SVCPOOL *nfsrvd_pool;
|
2012-10-14 22:33:17 +00:00
|
|
|
extern struct nfsv4lock nfsd_suspend_lock;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
|
|
|
|
NFSDLOCKMUTEX;
|
2013-08-14 21:11:26 +00:00
|
|
|
struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
|
2014-01-03 15:09:59 +00:00
|
|
|
struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
|
2013-08-14 21:11:26 +00:00
|
|
|
struct mtx nfsrc_udpmtx;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct mtx nfs_v4root_mutex;
|
|
|
|
struct nfsrvfh nfs_rootfh, nfs_pubfh;
|
|
|
|
int nfs_pubfhset = 0, nfs_rootfhset = 0;
|
2011-01-14 23:30:35 +00:00
|
|
|
struct proc *nfsd_master_proc = NULL;
|
|
|
|
static pid_t nfsd_master_pid = (pid_t)-1;
|
|
|
|
static char nfsd_master_comm[MAXCOMLEN + 1];
|
|
|
|
static struct timeval nfsd_master_start;
|
2009-05-21 01:50:27 +00:00
|
|
|
static uint32_t nfsv4_sysid = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2009-05-17 17:54:01 +00:00
|
|
|
static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
|
|
|
|
struct ucred *);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2011-04-14 21:49:52 +00:00
|
|
|
int nfsrv_enable_crossmntpt = 1;
|
2009-05-04 15:23:58 +00:00
|
|
|
static int nfs_commit_blks;
|
|
|
|
static int nfs_commit_miss;
|
|
|
|
extern int nfsrv_issuedelegs;
|
2009-05-17 17:54:01 +00:00
|
|
|
extern int nfsrv_dolocallocks;
|
|
|
|
|
2011-05-08 01:01:27 +00:00
|
|
|
SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server");
|
|
|
|
SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
|
2011-04-14 21:49:52 +00:00
|
|
|
&nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
|
2011-05-08 01:01:27 +00:00
|
|
|
SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
|
2009-05-17 17:54:01 +00:00
|
|
|
0, "");
|
2011-05-08 01:01:27 +00:00
|
|
|
SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
|
2009-05-17 17:54:01 +00:00
|
|
|
0, "");
|
2011-05-08 01:01:27 +00:00
|
|
|
SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
|
2009-05-17 17:54:01 +00:00
|
|
|
&nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
|
2011-05-08 01:01:27 +00:00
|
|
|
SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
|
2009-05-17 17:54:01 +00:00
|
|
|
&nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2011-12-01 18:46:28 +00:00
|
|
|
#define MAX_REORDERED_RPC 16
|
|
|
|
#define NUM_HEURISTIC 1031
|
2009-05-04 15:23:58 +00:00
|
|
|
#define NHUSE_INIT 64
|
|
|
|
#define NHUSE_INC 16
|
|
|
|
#define NHUSE_MAX 2048
|
|
|
|
|
|
|
|
static struct nfsheur {
|
|
|
|
struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
|
2011-12-01 18:46:28 +00:00
|
|
|
off_t nh_nextoff; /* next offset for sequential detection */
|
2009-05-04 15:23:58 +00:00
|
|
|
int nh_use; /* use count for selection */
|
|
|
|
int nh_seqcount; /* heuristic */
|
|
|
|
} nfsheur[NUM_HEURISTIC];
|
|
|
|
|
|
|
|
|
2011-12-01 18:46:28 +00:00
|
|
|
/*
|
|
|
|
* Heuristic to detect sequential operation.
|
|
|
|
*/
|
|
|
|
static struct nfsheur *
|
|
|
|
nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
|
|
|
|
{
|
|
|
|
struct nfsheur *nh;
|
|
|
|
int hi, try;
|
|
|
|
|
|
|
|
/* Locate best candidate. */
|
|
|
|
try = 32;
|
|
|
|
hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
|
|
|
|
nh = &nfsheur[hi];
|
|
|
|
while (try--) {
|
|
|
|
if (nfsheur[hi].nh_vp == vp) {
|
|
|
|
nh = &nfsheur[hi];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (nfsheur[hi].nh_use > 0)
|
|
|
|
--nfsheur[hi].nh_use;
|
|
|
|
hi = (hi + 1) % NUM_HEURISTIC;
|
|
|
|
if (nfsheur[hi].nh_use < nh->nh_use)
|
|
|
|
nh = &nfsheur[hi];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize hint if this is a new file. */
|
|
|
|
if (nh->nh_vp != vp) {
|
|
|
|
nh->nh_vp = vp;
|
|
|
|
nh->nh_nextoff = uio->uio_offset;
|
|
|
|
nh->nh_use = NHUSE_INIT;
|
|
|
|
if (uio->uio_offset == 0)
|
|
|
|
nh->nh_seqcount = 4;
|
|
|
|
else
|
|
|
|
nh->nh_seqcount = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Calculate heuristic. */
|
|
|
|
if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
|
|
|
|
uio->uio_offset == nh->nh_nextoff) {
|
|
|
|
/* See comments in vfs_vnops.c:sequential_heuristic(). */
|
|
|
|
nh->nh_seqcount += howmany(uio->uio_resid, 16384);
|
|
|
|
if (nh->nh_seqcount > IO_SEQMAX)
|
|
|
|
nh->nh_seqcount = IO_SEQMAX;
|
|
|
|
} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
|
|
|
|
imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
|
|
|
|
/* Probably a reordered RPC, leave seqcount alone. */
|
|
|
|
} else if (nh->nh_seqcount > 1) {
|
|
|
|
nh->nh_seqcount /= 2;
|
|
|
|
} else {
|
|
|
|
nh->nh_seqcount = 0;
|
|
|
|
}
|
|
|
|
nh->nh_use += NHUSE_INC;
|
|
|
|
if (nh->nh_use > NHUSE_MAX)
|
|
|
|
nh->nh_use = NHUSE_MAX;
|
|
|
|
return (nh);
|
|
|
|
}
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Get attributes into nfsvattr structure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
|
2010-12-24 21:31:18 +00:00
|
|
|
struct thread *p, int vpislocked)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
|
|
|
int error, lockedit = 0;
|
|
|
|
|
2010-12-24 21:31:18 +00:00
|
|
|
if (vpislocked == 0) {
|
|
|
|
/*
|
|
|
|
* When vpislocked == 0, the vnode is either exclusively
|
|
|
|
* locked by this thread or not locked by this thread.
|
|
|
|
* As such, shared lock it, if not exclusively locked.
|
|
|
|
*/
|
2011-07-16 08:05:41 +00:00
|
|
|
if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
|
2010-12-24 21:31:18 +00:00
|
|
|
lockedit = 1;
|
2011-07-16 08:05:31 +00:00
|
|
|
NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
|
2010-12-24 21:31:18 +00:00
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
|
2010-12-24 21:31:18 +00:00
|
|
|
if (lockedit != 0)
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a file handle for a vnode.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
|
|
|
|
fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
|
|
|
|
error = VOP_VPTOFH(vp, &fhp->fh_fid);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform access checking for vnodes obtained from file handles that would
|
|
|
|
* refer to files already opened by a Unix client. You cannot just use
|
2009-12-25 20:44:19 +00:00
|
|
|
* vn_writechk() and VOP_ACCESSX() for two reasons.
|
|
|
|
* 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
|
|
|
|
* case.
|
2009-05-04 15:23:58 +00:00
|
|
|
* 2 - The owner is to be given access irrespective of mode bits for some
|
|
|
|
* operations, so that processes that chmod after opening a file don't
|
|
|
|
* break.
|
|
|
|
*/
|
|
|
|
int
|
2009-12-25 20:44:19 +00:00
|
|
|
nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
|
|
|
|
struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
|
|
|
|
u_int32_t *supportedtypep)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
|
|
|
struct vattr vattr;
|
|
|
|
int error = 0, getret = 0;
|
|
|
|
|
2011-01-02 19:58:39 +00:00
|
|
|
if (vpislocked == 0) {
|
2011-07-16 08:51:09 +00:00
|
|
|
if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
|
|
|
|
error = EPERM;
|
|
|
|
goto out;
|
|
|
|
}
|
2011-01-02 19:58:39 +00:00
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (accmode & VWRITE) {
|
|
|
|
/* Just vn_writechk() changed to check rdonly */
|
|
|
|
/*
|
|
|
|
* Disallow write attempts on read-only file systems;
|
|
|
|
* unless the file is a socket or a block or character
|
|
|
|
* device resident on the file system.
|
|
|
|
*/
|
|
|
|
if (NFSVNO_EXRDONLY(exp) ||
|
2009-12-25 20:44:19 +00:00
|
|
|
(vp->v_mount->mnt_flag & MNT_RDONLY)) {
|
2009-05-04 15:23:58 +00:00
|
|
|
switch (vp->v_type) {
|
|
|
|
case VREG:
|
|
|
|
case VDIR:
|
|
|
|
case VLNK:
|
2011-01-02 19:58:39 +00:00
|
|
|
error = EROFS;
|
2009-05-04 15:23:58 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If there's shared text associated with
|
|
|
|
* the inode, try to free it up once. If
|
|
|
|
* we fail, we can't allow writing.
|
|
|
|
*/
|
2012-09-28 11:25:02 +00:00
|
|
|
if (VOP_IS_TEXT(vp) && error == 0)
|
2011-01-02 19:58:39 +00:00
|
|
|
error = ETXTBSY;
|
|
|
|
}
|
|
|
|
if (error != 0) {
|
|
|
|
if (vpislocked == 0)
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Should the override still be applied when ACLs are enabled?
|
|
|
|
*/
|
2009-12-25 20:44:19 +00:00
|
|
|
error = VOP_ACCESSX(vp, accmode, cred, p);
|
|
|
|
if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
|
|
|
|
/*
|
|
|
|
* Try again with VEXPLICIT_DENY, to see if the test for
|
|
|
|
* deletion is supported.
|
|
|
|
*/
|
|
|
|
error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
|
|
|
|
if (error == 0) {
|
|
|
|
if (vp->v_type == VDIR) {
|
|
|
|
accmode &= ~(VDELETE | VDELETE_CHILD);
|
|
|
|
accmode |= VWRITE;
|
|
|
|
error = VOP_ACCESSX(vp, accmode, cred, p);
|
|
|
|
} else if (supportedtypep != NULL) {
|
|
|
|
*supportedtypep &= ~NFSACCESS_DELETE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow certain operations for the owner (reads and writes
|
|
|
|
* on files that are already open).
|
|
|
|
*/
|
|
|
|
if (override != NFSACCCHK_NOOVERRIDE &&
|
|
|
|
(error == EPERM || error == EACCES)) {
|
|
|
|
if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
|
|
|
|
error = 0;
|
|
|
|
else if (override & NFSACCCHK_ALLOWOWNER) {
|
|
|
|
getret = VOP_GETATTR(vp, &vattr, cred);
|
|
|
|
if (getret == 0 && cred->cr_uid == vattr.va_uid)
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (vpislocked == 0)
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set attribute(s) vnop.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
|
|
|
|
struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-02-16 02:19:53 +00:00
|
|
|
* Set up nameidata for a lookup() call and do it.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
|
|
|
|
struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
|
|
|
|
struct vnode **retdirp)
|
|
|
|
{
|
|
|
|
struct componentname *cnp = &ndp->ni_cnd;
|
|
|
|
int i;
|
|
|
|
struct iovec aiov;
|
|
|
|
struct uio auio;
|
|
|
|
int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
|
|
|
|
int error = 0, crossmnt;
|
|
|
|
char *cp;
|
|
|
|
|
|
|
|
*retdirp = NULL;
|
|
|
|
cnp->cn_nameptr = cnp->cn_pnbuf;
|
2011-09-03 00:28:53 +00:00
|
|
|
ndp->ni_strictrelative = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Extract and set starting directory.
|
|
|
|
*/
|
|
|
|
if (dp->v_type != VDIR) {
|
|
|
|
if (islocked)
|
|
|
|
vput(dp);
|
|
|
|
else
|
|
|
|
vrele(dp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = ENOTDIR;
|
|
|
|
goto out1;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (islocked)
|
2011-07-16 08:05:26 +00:00
|
|
|
NFSVOPUNLOCK(dp, 0);
|
2009-05-04 15:23:58 +00:00
|
|
|
VREF(dp);
|
|
|
|
*retdirp = dp;
|
|
|
|
if (NFSVNO_EXRDONLY(exp))
|
|
|
|
cnp->cn_flags |= RDONLY;
|
|
|
|
ndp->ni_segflg = UIO_SYSSPACE;
|
|
|
|
crossmnt = 1;
|
|
|
|
|
|
|
|
if (nd->nd_flag & ND_PUBLOOKUP) {
|
|
|
|
ndp->ni_loopcnt = 0;
|
|
|
|
if (cnp->cn_pnbuf[0] == '/') {
|
|
|
|
vrele(dp);
|
|
|
|
/*
|
|
|
|
* Check for degenerate pathnames here, since lookup()
|
|
|
|
* panics on them.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < ndp->ni_pathlen; i++)
|
|
|
|
if (cnp->cn_pnbuf[i] != '/')
|
|
|
|
break;
|
|
|
|
if (i == ndp->ni_pathlen) {
|
|
|
|
error = NFSERR_ACCES;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
dp = rootvnode;
|
|
|
|
VREF(dp);
|
|
|
|
}
|
2011-04-14 21:49:52 +00:00
|
|
|
} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
|
2009-05-04 15:23:58 +00:00
|
|
|
(nd->nd_flag & ND_NFSV4) == 0) {
|
|
|
|
/*
|
|
|
|
* Only cross mount points for NFSv4 when doing a
|
|
|
|
* mount while traversing the file system above
|
2011-04-14 21:49:52 +00:00
|
|
|
* the mount point, unless nfsrv_enable_crossmntpt is set.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
cnp->cn_flags |= NOCROSSMOUNT;
|
|
|
|
crossmnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize for scan, set ni_startdir and bump ref on dp again
|
2013-05-15 08:38:49 +00:00
|
|
|
* because lookup() will dereference ni_startdir.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
cnp->cn_thread = p;
|
|
|
|
ndp->ni_startdir = dp;
|
|
|
|
ndp->ni_rootdir = rootvnode;
|
2012-03-03 16:13:20 +00:00
|
|
|
ndp->ni_topdir = NULL;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (!lockleaf)
|
|
|
|
cnp->cn_flags |= LOCKLEAF;
|
|
|
|
for (;;) {
|
|
|
|
cnp->cn_nameptr = cnp->cn_pnbuf;
|
|
|
|
/*
|
|
|
|
* Call lookup() to do the real work. If an error occurs,
|
|
|
|
* ndp->ni_vp and ni_dvp are left uninitialized or NULL and
|
|
|
|
* we do not have to dereference anything before returning.
|
|
|
|
* In either case ni_startdir will be dereferenced and NULLed
|
|
|
|
* out.
|
|
|
|
*/
|
|
|
|
error = lookup(ndp);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for encountering a symbolic link. Trivial
|
|
|
|
* termination occurs if no symlink encountered.
|
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & ISSYMLINK) == 0) {
|
|
|
|
if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
if (ndp->ni_vp && !lockleaf)
|
2011-07-16 08:05:26 +00:00
|
|
|
NFSVOPUNLOCK(ndp->ni_vp, 0);
|
2009-05-04 15:23:58 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Validate symlink
|
|
|
|
*/
|
|
|
|
if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
|
2011-07-16 08:05:26 +00:00
|
|
|
NFSVOPUNLOCK(ndp->ni_dvp, 0);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!(nd->nd_flag & ND_PUBLOOKUP)) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto badlink2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
|
|
|
|
error = ELOOP;
|
|
|
|
goto badlink2;
|
|
|
|
}
|
|
|
|
if (ndp->ni_pathlen > 1)
|
|
|
|
cp = uma_zalloc(namei_zone, M_WAITOK);
|
|
|
|
else
|
|
|
|
cp = cnp->cn_pnbuf;
|
|
|
|
aiov.iov_base = cp;
|
|
|
|
aiov.iov_len = MAXPATHLEN;
|
|
|
|
auio.uio_iov = &aiov;
|
|
|
|
auio.uio_iovcnt = 1;
|
|
|
|
auio.uio_offset = 0;
|
|
|
|
auio.uio_rw = UIO_READ;
|
|
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
|
|
auio.uio_td = NULL;
|
|
|
|
auio.uio_resid = MAXPATHLEN;
|
|
|
|
error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
|
|
|
|
if (error) {
|
|
|
|
badlink1:
|
|
|
|
if (ndp->ni_pathlen > 1)
|
|
|
|
uma_zfree(namei_zone, cp);
|
|
|
|
badlink2:
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
linklen = MAXPATHLEN - auio.uio_resid;
|
|
|
|
if (linklen == 0) {
|
|
|
|
error = ENOENT;
|
|
|
|
goto badlink1;
|
|
|
|
}
|
|
|
|
if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
|
|
|
|
error = ENAMETOOLONG;
|
|
|
|
goto badlink1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adjust or replace path
|
|
|
|
*/
|
|
|
|
if (ndp->ni_pathlen > 1) {
|
|
|
|
NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
|
|
|
|
uma_zfree(namei_zone, cnp->cn_pnbuf);
|
|
|
|
cnp->cn_pnbuf = cp;
|
|
|
|
} else
|
|
|
|
cnp->cn_pnbuf[linklen] = '\0';
|
|
|
|
ndp->ni_pathlen += linklen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cleanup refs for next loop and check if root directory
|
|
|
|
* should replace current directory. Normally ni_dvp
|
|
|
|
* becomes the new base directory and is cleaned up when
|
|
|
|
* we loop. Explicitly null pointers after invalidation
|
|
|
|
* to clarify operation.
|
|
|
|
*/
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
ndp->ni_vp = NULL;
|
|
|
|
|
|
|
|
if (cnp->cn_pnbuf[0] == '/') {
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
ndp->ni_dvp = ndp->ni_rootdir;
|
|
|
|
VREF(ndp->ni_dvp);
|
|
|
|
}
|
|
|
|
ndp->ni_startdir = ndp->ni_dvp;
|
|
|
|
ndp->ni_dvp = NULL;
|
|
|
|
}
|
|
|
|
if (!lockleaf)
|
|
|
|
cnp->cn_flags &= ~LOCKLEAF;
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (error) {
|
2012-05-08 03:39:44 +00:00
|
|
|
nfsvno_relpathbuf(ndp);
|
2009-05-04 15:23:58 +00:00
|
|
|
ndp->ni_vp = NULL;
|
|
|
|
ndp->ni_dvp = NULL;
|
|
|
|
ndp->ni_startdir = NULL;
|
|
|
|
} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
|
|
|
|
ndp->ni_dvp = NULL;
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out1:
|
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set up a pathname buffer and return a pointer to it and, optionally
|
|
|
|
* set a hash pointer.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
|
|
|
|
{
|
|
|
|
struct componentname *cnp = &ndp->ni_cnd;
|
|
|
|
|
|
|
|
cnp->cn_flags |= (NOMACCHECK | HASBUF);
|
|
|
|
cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
|
|
|
|
if (hashpp != NULL)
|
|
|
|
*hashpp = NULL;
|
|
|
|
*bufpp = cnp->cn_pnbuf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release the above path buffer, if not released by nfsvno_namei().
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nfsvno_relpathbuf(struct nameidata *ndp)
|
|
|
|
{
|
|
|
|
|
|
|
|
if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
|
|
|
|
panic("nfsrelpath");
|
|
|
|
uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
|
|
|
|
ndp->ni_cnd.cn_flags &= ~HASBUF;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Readlink vnode op into an mbuf list.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
|
|
|
|
struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
|
|
|
|
{
|
|
|
|
struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
|
|
|
|
struct iovec *ivp = iv;
|
|
|
|
struct uio io, *uiop = &io;
|
|
|
|
struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
|
2011-07-16 08:51:09 +00:00
|
|
|
int i, len, tlen, error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
len = 0;
|
|
|
|
i = 0;
|
|
|
|
while (len < NFS_MAXPATHLEN) {
|
|
|
|
NFSMGET(mp);
|
2012-12-05 08:04:20 +00:00
|
|
|
MCLGET(mp, M_WAITOK);
|
2009-05-04 15:23:58 +00:00
|
|
|
mp->m_len = NFSMSIZ(mp);
|
|
|
|
if (len == 0) {
|
|
|
|
mp3 = mp2 = mp;
|
|
|
|
} else {
|
|
|
|
mp2->m_next = mp;
|
|
|
|
mp2 = mp;
|
|
|
|
}
|
|
|
|
if ((len + mp->m_len) > NFS_MAXPATHLEN) {
|
|
|
|
mp->m_len = NFS_MAXPATHLEN - len;
|
|
|
|
len = NFS_MAXPATHLEN;
|
|
|
|
} else {
|
|
|
|
len += mp->m_len;
|
|
|
|
}
|
|
|
|
ivp->iov_base = mtod(mp, caddr_t);
|
|
|
|
ivp->iov_len = mp->m_len;
|
|
|
|
i++;
|
|
|
|
ivp++;
|
|
|
|
}
|
|
|
|
uiop->uio_iov = iv;
|
|
|
|
uiop->uio_iovcnt = i;
|
|
|
|
uiop->uio_offset = 0;
|
|
|
|
uiop->uio_resid = len;
|
|
|
|
uiop->uio_rw = UIO_READ;
|
|
|
|
uiop->uio_segflg = UIO_SYSSPACE;
|
|
|
|
uiop->uio_td = NULL;
|
|
|
|
error = VOP_READLINK(vp, uiop, cred);
|
|
|
|
if (error) {
|
|
|
|
m_freem(mp3);
|
|
|
|
*lenp = 0;
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (uiop->uio_resid > 0) {
|
|
|
|
len -= uiop->uio_resid;
|
|
|
|
tlen = NFSM_RNDUP(len);
|
|
|
|
nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
|
|
|
|
}
|
|
|
|
*lenp = len;
|
|
|
|
*mpp = mp3;
|
|
|
|
*mpendp = mp;
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read vnode op call into mbuf list.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
|
|
|
|
struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
|
|
|
|
{
|
|
|
|
struct mbuf *m;
|
|
|
|
int i;
|
|
|
|
struct iovec *iv;
|
|
|
|
struct iovec *iv2;
|
2011-12-01 18:46:28 +00:00
|
|
|
int error = 0, len, left, siz, tlen, ioflag = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct mbuf *m2 = NULL, *m3;
|
|
|
|
struct uio io, *uiop = &io;
|
|
|
|
struct nfsheur *nh;
|
|
|
|
|
|
|
|
len = left = NFSM_RNDUP(cnt);
|
|
|
|
m3 = NULL;
|
|
|
|
/*
|
|
|
|
* Generate the mbuf list with the uio_iov ref. to it.
|
|
|
|
*/
|
|
|
|
i = 0;
|
|
|
|
while (left > 0) {
|
|
|
|
NFSMGET(m);
|
2012-12-05 08:04:20 +00:00
|
|
|
MCLGET(m, M_WAITOK);
|
2009-05-04 15:23:58 +00:00
|
|
|
m->m_len = 0;
|
|
|
|
siz = min(M_TRAILINGSPACE(m), left);
|
|
|
|
left -= siz;
|
|
|
|
i++;
|
|
|
|
if (m3)
|
|
|
|
m2->m_next = m;
|
|
|
|
else
|
|
|
|
m3 = m;
|
|
|
|
m2 = m;
|
|
|
|
}
|
|
|
|
MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
|
|
|
|
M_TEMP, M_WAITOK);
|
|
|
|
uiop->uio_iov = iv2 = iv;
|
|
|
|
m = m3;
|
|
|
|
left = len;
|
|
|
|
i = 0;
|
|
|
|
while (left > 0) {
|
|
|
|
if (m == NULL)
|
|
|
|
panic("nfsvno_read iov");
|
|
|
|
siz = min(M_TRAILINGSPACE(m), left);
|
|
|
|
if (siz > 0) {
|
|
|
|
iv->iov_base = mtod(m, caddr_t) + m->m_len;
|
|
|
|
iv->iov_len = siz;
|
|
|
|
m->m_len += siz;
|
|
|
|
left -= siz;
|
|
|
|
iv++;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
m = m->m_next;
|
|
|
|
}
|
|
|
|
uiop->uio_iovcnt = i;
|
|
|
|
uiop->uio_offset = off;
|
|
|
|
uiop->uio_resid = len;
|
|
|
|
uiop->uio_rw = UIO_READ;
|
|
|
|
uiop->uio_segflg = UIO_SYSSPACE;
|
2011-12-01 18:46:28 +00:00
|
|
|
nh = nfsrv_sequential_heuristic(uiop, vp);
|
|
|
|
ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
|
2009-05-04 15:23:58 +00:00
|
|
|
error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
|
|
|
|
FREE((caddr_t)iv2, M_TEMP);
|
|
|
|
if (error) {
|
|
|
|
m_freem(m3);
|
|
|
|
*mpp = NULL;
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2011-12-01 18:46:28 +00:00
|
|
|
nh->nh_nextoff = uiop->uio_offset;
|
2009-05-04 15:23:58 +00:00
|
|
|
tlen = len - uiop->uio_resid;
|
|
|
|
cnt = cnt < tlen ? cnt : tlen;
|
|
|
|
tlen = NFSM_RNDUP(cnt);
|
|
|
|
if (tlen == 0) {
|
|
|
|
m_freem(m3);
|
|
|
|
m3 = NULL;
|
|
|
|
} else if (len != tlen || tlen != cnt)
|
|
|
|
nfsrv_adj(m3, len - tlen, tlen - cnt);
|
|
|
|
*mpp = m3;
|
|
|
|
*mpendp = m2;
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write vnode op from an mbuf list.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
|
|
|
|
struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
|
|
|
|
{
|
|
|
|
struct iovec *ivp;
|
|
|
|
int i, len;
|
|
|
|
struct iovec *iv;
|
|
|
|
int ioflags, error;
|
|
|
|
struct uio io, *uiop = &io;
|
2011-12-01 18:46:28 +00:00
|
|
|
struct nfsheur *nh;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
|
|
|
|
M_WAITOK);
|
|
|
|
uiop->uio_iov = iv = ivp;
|
|
|
|
uiop->uio_iovcnt = cnt;
|
|
|
|
i = mtod(mp, caddr_t) + mp->m_len - cp;
|
|
|
|
len = retlen;
|
|
|
|
while (len > 0) {
|
|
|
|
if (mp == NULL)
|
|
|
|
panic("nfsvno_write");
|
|
|
|
if (i > 0) {
|
|
|
|
i = min(i, len);
|
|
|
|
ivp->iov_base = cp;
|
|
|
|
ivp->iov_len = i;
|
|
|
|
ivp++;
|
|
|
|
len -= i;
|
|
|
|
}
|
|
|
|
mp = mp->m_next;
|
|
|
|
if (mp) {
|
|
|
|
i = mp->m_len;
|
|
|
|
cp = mtod(mp, caddr_t);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (stable == NFSWRITE_UNSTABLE)
|
|
|
|
ioflags = IO_NODELOCKED;
|
|
|
|
else
|
|
|
|
ioflags = (IO_SYNC | IO_NODELOCKED);
|
|
|
|
uiop->uio_resid = retlen;
|
|
|
|
uiop->uio_rw = UIO_WRITE;
|
|
|
|
uiop->uio_segflg = UIO_SYSSPACE;
|
|
|
|
NFSUIOPROC(uiop, p);
|
|
|
|
uiop->uio_offset = off;
|
2011-12-01 18:46:28 +00:00
|
|
|
nh = nfsrv_sequential_heuristic(uiop, vp);
|
|
|
|
ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
|
2009-05-04 15:23:58 +00:00
|
|
|
error = VOP_WRITE(vp, uiop, ioflags, cred);
|
2011-12-01 18:46:28 +00:00
|
|
|
if (error == 0)
|
|
|
|
nh->nh_nextoff = uiop->uio_offset;
|
2009-05-04 15:23:58 +00:00
|
|
|
FREE((caddr_t)iv, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Common code for creating a regular file (plus special files for V2).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
|
|
|
|
struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
|
2009-11-20 21:21:13 +00:00
|
|
|
int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
|
|
|
u_quad_t tempsize;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = nd->nd_repstat;
|
|
|
|
if (!error && ndp->ni_vp == NULL) {
|
|
|
|
if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
error = VOP_CREATE(ndp->ni_dvp,
|
|
|
|
&ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
if (!error) {
|
|
|
|
if (*exclusive_flagp) {
|
|
|
|
*exclusive_flagp = 0;
|
|
|
|
NFSVNO_ATTRINIT(nvap);
|
2009-11-20 21:21:13 +00:00
|
|
|
nvap->na_atime.tv_sec = cverf[0];
|
|
|
|
nvap->na_atime.tv_nsec = cverf[1];
|
2009-05-04 15:23:58 +00:00
|
|
|
error = VOP_SETATTR(ndp->ni_vp,
|
|
|
|
&nvap->na_vattr, nd->nd_cred);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* NFS V2 Only. nfsrvd_mknod() does this for V3.
|
|
|
|
* (This implies, just get out on an error.)
|
|
|
|
*/
|
|
|
|
} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
|
|
|
|
nvap->na_type == VFIFO) {
|
|
|
|
if (nvap->na_type == VCHR && rdev == 0xffffffff)
|
|
|
|
nvap->na_type = VFIFO;
|
|
|
|
if (nvap->na_type != VFIFO &&
|
|
|
|
(error = priv_check_cred(nd->nd_cred,
|
|
|
|
PRIV_VFS_MKNOD_DEV, 0))) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vput(ndp->ni_dvp);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
nvap->na_rdev = rdev;
|
|
|
|
error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
|
|
|
|
&ndp->ni_cnd, &nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-04-11 20:54:30 +00:00
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
if (error)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vput(ndp->ni_dvp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = ENXIO;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
*vpp = ndp->ni_vp;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Handle cases where error is already set and/or
|
|
|
|
* the file exists.
|
|
|
|
* 1 - clean up the lookup
|
|
|
|
* 2 - iff !error and na_size set, truncate it
|
|
|
|
*/
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
*vpp = ndp->ni_vp;
|
|
|
|
if (ndp->ni_dvp == *vpp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
if (!error && nvap->na_size != VNOVAL) {
|
2009-12-25 20:44:19 +00:00
|
|
|
error = nfsvno_accchk(*vpp, VWRITE,
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
|
2009-12-25 20:44:19 +00:00
|
|
|
NFSACCCHK_VPISLOCKED, NULL);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!error) {
|
|
|
|
tempsize = nvap->na_size;
|
|
|
|
NFSVNO_ATTRINIT(nvap);
|
|
|
|
nvap->na_size = tempsize;
|
|
|
|
error = VOP_SETATTR(*vpp,
|
|
|
|
&nvap->na_vattr, nd->nd_cred);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (error)
|
|
|
|
vput(*vpp);
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a mknod vnode op.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
|
|
|
|
struct thread *p)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
enum vtype vtyp;
|
|
|
|
|
|
|
|
vtyp = nvap->na_type;
|
|
|
|
/*
|
|
|
|
* Iff doesn't exist, create it.
|
|
|
|
*/
|
|
|
|
if (ndp->ni_vp) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vrele(ndp->ni_vp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = EEXIST;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vput(ndp->ni_dvp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = NFSERR_BADTYPE;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (vtyp == VSOCK) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
|
|
|
|
&ndp->ni_cnd, &nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
} else {
|
|
|
|
if (nvap->na_type != VFIFO &&
|
|
|
|
(error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vput(ndp->ni_dvp);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
|
|
|
|
&ndp->ni_cnd, &nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-01-03 00:40:13 +00:00
|
|
|
vrele(ndp->ni_startdir);
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Since VOP_MKNOD returns the ni_vp, I can't
|
|
|
|
* see any reason to do the lookup.
|
|
|
|
*/
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mkdir vnode op.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
|
|
|
|
struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (ndp->ni_vp != NULL) {
|
|
|
|
if (ndp->ni_dvp == ndp->ni_vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vrele(ndp->ni_vp);
|
2010-04-02 02:19:28 +00:00
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = EEXIST;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
|
|
|
|
&nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* symlink vnode op.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
|
|
|
|
int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
|
|
|
|
struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (ndp->ni_vp) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
if (ndp->ni_dvp == ndp->ni_vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vrele(ndp->ni_vp);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = EEXIST;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
|
|
|
|
&nvap->na_vattr, pathcp);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
/*
|
|
|
|
* Although FreeBSD still had the lookup code in
|
|
|
|
* it for 7/current, there doesn't seem to be any
|
|
|
|
* point, since VOP_SYMLINK() returns the ni_vp.
|
|
|
|
* Just vput it for v2.
|
|
|
|
*/
|
|
|
|
if (!not_v2 && !error)
|
|
|
|
vput(ndp->ni_vp);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse symbolic link arguments.
|
|
|
|
* This function has an ugly side effect. It will MALLOC() an area for
|
|
|
|
* the symlink and set iov_base to point to it, only if it succeeds.
|
|
|
|
* So, if it returns with uiop->uio_iov->iov_base != NULL, that must
|
|
|
|
* be FREE'd later.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
|
|
|
|
struct thread *p, char **pathcpp, int *lenp)
|
|
|
|
{
|
|
|
|
u_int32_t *tl;
|
|
|
|
char *pathcp = NULL;
|
|
|
|
int error = 0, len;
|
|
|
|
struct nfsv2_sattr *sp;
|
|
|
|
|
|
|
|
*pathcpp = NULL;
|
|
|
|
*lenp = 0;
|
|
|
|
if ((nd->nd_flag & ND_NFSV3) &&
|
|
|
|
(error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
|
|
|
|
goto nfsmout;
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
len = fxdr_unsigned(int, *tl);
|
|
|
|
if (len > NFS_MAXPATHLEN || len <= 0) {
|
|
|
|
error = EBADRPC;
|
|
|
|
goto nfsmout;
|
|
|
|
}
|
|
|
|
MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
|
|
|
|
error = nfsrv_mtostr(nd, pathcp, len);
|
|
|
|
if (error)
|
|
|
|
goto nfsmout;
|
|
|
|
if (nd->nd_flag & ND_NFSV2) {
|
|
|
|
NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
|
|
|
|
nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
|
|
|
|
}
|
|
|
|
*pathcpp = pathcp;
|
|
|
|
*lenp = len;
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (0);
|
|
|
|
nfsmout:
|
|
|
|
if (pathcp)
|
|
|
|
free(pathcp, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a non-directory object.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
|
|
|
|
struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
vp = ndp->ni_vp;
|
|
|
|
if (vp->v_type == VDIR)
|
|
|
|
error = NFSERR_ISDIR;
|
|
|
|
else if (is_v4)
|
|
|
|
error = nfsrv_checkremove(vp, 1, p);
|
|
|
|
if (!error)
|
|
|
|
error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
|
|
|
|
if (ndp->ni_dvp == vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vput(vp);
|
2012-04-27 20:23:24 +00:00
|
|
|
if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a directory.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
|
|
|
|
struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
vp = ndp->ni_vp;
|
|
|
|
if (vp->v_type != VDIR) {
|
|
|
|
error = ENOTDIR;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* No rmdir "." please.
|
|
|
|
*/
|
|
|
|
if (ndp->ni_dvp == vp) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* The root of a mounted filesystem cannot be deleted.
|
|
|
|
*/
|
|
|
|
if (vp->v_vflag & VV_ROOT)
|
|
|
|
error = EBUSY;
|
|
|
|
out:
|
|
|
|
if (!error)
|
|
|
|
error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
|
|
|
|
if (ndp->ni_dvp == vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
vput(vp);
|
2012-04-27 20:23:24 +00:00
|
|
|
if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rename vnode op.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
|
|
|
|
u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
|
|
|
|
{
|
|
|
|
struct vnode *fvp, *tvp, *tdvp;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
fvp = fromndp->ni_vp;
|
|
|
|
if (ndstat) {
|
|
|
|
vrele(fromndp->ni_dvp);
|
|
|
|
vrele(fvp);
|
|
|
|
error = ndstat;
|
|
|
|
goto out1;
|
|
|
|
}
|
|
|
|
tdvp = tondp->ni_dvp;
|
|
|
|
tvp = tondp->ni_vp;
|
|
|
|
if (tvp != NULL) {
|
|
|
|
if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
|
|
|
|
goto out;
|
|
|
|
} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (tvp->v_type == VDIR && tvp->v_mountedhere) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A rename to '.' or '..' results in a prematurely
|
|
|
|
* unlocked vnode on FreeBSD5, so I'm just going to fail that
|
|
|
|
* here.
|
|
|
|
*/
|
|
|
|
if ((tondp->ni_cnd.cn_namelen == 1 &&
|
|
|
|
tondp->ni_cnd.cn_nameptr[0] == '.') ||
|
|
|
|
(tondp->ni_cnd.cn_namelen == 2 &&
|
|
|
|
tondp->ni_cnd.cn_nameptr[0] == '.' &&
|
|
|
|
tondp->ni_cnd.cn_nameptr[1] == '.')) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (fvp->v_type == VDIR && fvp->v_mountedhere) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (fvp->v_mount != tdvp->v_mount) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (fvp == tdvp) {
|
|
|
|
error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (fvp == tvp) {
|
|
|
|
/*
|
|
|
|
* If source and destination are the same, there is nothing to
|
|
|
|
* do. Set error to -1 to indicate this.
|
|
|
|
*/
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (ndflag & ND_NFSV4) {
|
2011-07-16 08:05:31 +00:00
|
|
|
if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
|
2011-01-02 19:58:39 +00:00
|
|
|
error = nfsrv_checkremove(fvp, 0, p);
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(fvp, 0);
|
2011-01-02 19:58:39 +00:00
|
|
|
} else
|
|
|
|
error = EPERM;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (tvp && !error)
|
|
|
|
error = nfsrv_checkremove(tvp, 1, p);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* For NFSv2 and NFSv3, try to get rid of the delegation, so
|
|
|
|
* that the NFSv4 client won't be confused by the rename.
|
|
|
|
* Since nfsd_recalldelegation() can only be called on an
|
|
|
|
* unlocked vnode at this point and fvp is the file that will
|
|
|
|
* still exist after the rename, just do fvp.
|
|
|
|
*/
|
|
|
|
nfsd_recalldelegation(fvp, p);
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
if (!error) {
|
|
|
|
error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
|
|
|
|
&fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
|
|
|
|
&tondp->ni_cnd);
|
|
|
|
} else {
|
|
|
|
if (tdvp == tvp)
|
|
|
|
vrele(tdvp);
|
|
|
|
else
|
|
|
|
vput(tdvp);
|
|
|
|
if (tvp)
|
|
|
|
vput(tvp);
|
|
|
|
vrele(fromndp->ni_dvp);
|
|
|
|
vrele(fvp);
|
|
|
|
if (error == -1)
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
vrele(tondp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(tondp);
|
|
|
|
out1:
|
|
|
|
vrele(fromndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(fromndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Link vnode op.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
|
|
|
|
struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
struct vnode *xp;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
xp = ndp->ni_vp;
|
|
|
|
if (xp != NULL) {
|
|
|
|
error = EEXIST;
|
|
|
|
} else {
|
|
|
|
xp = ndp->ni_dvp;
|
|
|
|
if (vp->v_mount != xp->v_mount)
|
|
|
|
error = EXDEV;
|
|
|
|
}
|
|
|
|
if (!error) {
|
2011-07-16 08:05:31 +00:00
|
|
|
NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
|
2011-01-02 19:58:39 +00:00
|
|
|
if ((vp->v_iflag & VI_DOOMED) == 0)
|
|
|
|
error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
|
|
|
|
else
|
|
|
|
error = EPERM;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (ndp->ni_dvp == vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
|
|
|
if (ndp->ni_dvp == ndp->ni_vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
if (ndp->ni_vp)
|
|
|
|
vrele(ndp->ni_vp);
|
|
|
|
}
|
|
|
|
nfsvno_relpathbuf(ndp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do the fsync() appropriate for the commit.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
|
|
|
|
struct thread *td)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
2011-12-16 00:58:41 +00:00
|
|
|
/*
|
|
|
|
* RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
|
|
|
|
* file is done. At this time VOP_FSYNC does not accept offset and
|
|
|
|
* byte count parameters so call VOP_FSYNC the whole file for now.
|
|
|
|
* The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
|
|
|
|
*/
|
|
|
|
if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Give up and do the whole thing
|
|
|
|
*/
|
|
|
|
if (vp->v_object &&
|
|
|
|
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WLOCK(vp->v_object);
|
2009-05-04 15:23:58 +00:00
|
|
|
vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WUNLOCK(vp->v_object);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
error = VOP_FSYNC(vp, MNT_WAIT, td);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Locate and synchronously write any buffers that fall
|
|
|
|
* into the requested range. Note: we are assuming that
|
|
|
|
* f_iosize is a power of 2.
|
|
|
|
*/
|
|
|
|
int iosize = vp->v_mount->mnt_stat.f_iosize;
|
|
|
|
int iomask = iosize - 1;
|
|
|
|
struct bufobj *bo;
|
|
|
|
daddr_t lblkno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Align to iosize boundry, super-align to page boundry.
|
|
|
|
*/
|
|
|
|
if (off & iomask) {
|
|
|
|
cnt += off & iomask;
|
|
|
|
off &= ~(u_quad_t)iomask;
|
|
|
|
}
|
|
|
|
if (off & PAGE_MASK) {
|
|
|
|
cnt += off & PAGE_MASK;
|
|
|
|
off &= ~(u_quad_t)PAGE_MASK;
|
|
|
|
}
|
|
|
|
lblkno = off / iosize;
|
|
|
|
|
|
|
|
if (vp->v_object &&
|
|
|
|
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WLOCK(vp->v_object);
|
2011-02-05 21:21:27 +00:00
|
|
|
vm_object_page_clean(vp->v_object, off, off + cnt,
|
|
|
|
OBJPC_SYNC);
|
2013-03-09 02:32:23 +00:00
|
|
|
VM_OBJECT_WUNLOCK(vp->v_object);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bo = &vp->v_bufobj;
|
|
|
|
BO_LOCK(bo);
|
|
|
|
while (cnt > 0) {
|
|
|
|
struct buf *bp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have a buffer and it is marked B_DELWRI we
|
|
|
|
* have to lock and write it. Otherwise the prior
|
|
|
|
* write is assumed to have already been committed.
|
|
|
|
*
|
|
|
|
* gbincore() can return invalid buffers now so we
|
|
|
|
* have to check that bit as well (though B_DELWRI
|
|
|
|
* should not be set if B_INVAL is set there could be
|
|
|
|
* a race here since we haven't locked the buffer).
|
|
|
|
*/
|
|
|
|
if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
|
|
|
|
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
|
2013-05-31 00:43:41 +00:00
|
|
|
LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
|
2009-05-04 15:23:58 +00:00
|
|
|
BO_LOCK(bo);
|
|
|
|
continue; /* retry */
|
|
|
|
}
|
|
|
|
if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
|
|
|
|
B_DELWRI) {
|
|
|
|
bremfree(bp);
|
|
|
|
bp->b_flags &= ~B_ASYNC;
|
|
|
|
bwrite(bp);
|
|
|
|
++nfs_commit_miss;
|
|
|
|
} else
|
|
|
|
BUF_UNLOCK(bp);
|
|
|
|
BO_LOCK(bo);
|
|
|
|
}
|
|
|
|
++nfs_commit_blks;
|
|
|
|
if (cnt < iosize)
|
|
|
|
break;
|
|
|
|
cnt -= iosize;
|
|
|
|
++lblkno;
|
|
|
|
}
|
|
|
|
BO_UNLOCK(bo);
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Statfs vnode op.
|
|
|
|
*/
|
|
|
|
int
|
2009-05-11 15:33:26 +00:00
|
|
|
nfsvno_statfs(struct vnode *vp, struct statfs *sf)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
2011-05-06 01:29:14 +00:00
|
|
|
int error;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2011-05-06 01:29:14 +00:00
|
|
|
error = VFS_STATFS(vp->v_mount, sf);
|
|
|
|
if (error == 0) {
|
|
|
|
/*
|
|
|
|
* Since NFS handles these values as unsigned on the
|
|
|
|
* wire, there is no way to represent negative values,
|
|
|
|
* so set them to 0. Without this, they will appear
|
|
|
|
* to be very large positive values for clients like
|
|
|
|
* Solaris10.
|
|
|
|
*/
|
|
|
|
if (sf->f_bavail < 0)
|
|
|
|
sf->f_bavail = 0;
|
|
|
|
if (sf->f_ffree < 0)
|
|
|
|
sf->f_ffree = 0;
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2011-05-06 01:29:14 +00:00
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
|
|
|
|
* must handle nfsrv_opencheck() calls after any other access checks.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
|
|
|
|
nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
|
2009-11-20 21:21:13 +00:00
|
|
|
int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
|
2009-05-04 15:23:58 +00:00
|
|
|
NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
|
|
|
|
struct nfsexstuff *exp, struct vnode **vpp)
|
|
|
|
{
|
|
|
|
struct vnode *vp = NULL;
|
|
|
|
u_quad_t tempsize;
|
|
|
|
struct nfsexstuff nes;
|
|
|
|
|
|
|
|
if (ndp->ni_vp == NULL)
|
|
|
|
nd->nd_repstat = nfsrv_opencheck(clientid,
|
|
|
|
stateidp, stp, NULL, nd, p, nd->nd_repstat);
|
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
if (ndp->ni_vp == NULL) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
|
|
|
|
&ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
if (*exclusive_flagp) {
|
|
|
|
*exclusive_flagp = 0;
|
|
|
|
NFSVNO_ATTRINIT(nvap);
|
2009-11-20 21:21:13 +00:00
|
|
|
nvap->na_atime.tv_sec = cverf[0];
|
|
|
|
nvap->na_atime.tv_nsec = cverf[1];
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
|
|
|
|
&nvap->na_vattr, cred);
|
|
|
|
} else {
|
|
|
|
nfsrv_fixattr(nd, ndp->ni_vp, nvap,
|
|
|
|
aclp, p, attrbitp, exp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
vp = ndp->ni_vp;
|
|
|
|
} else {
|
|
|
|
if (ndp->ni_startdir)
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
vp = ndp->ni_vp;
|
|
|
|
if (create == NFSV4OPEN_CREATE) {
|
|
|
|
if (ndp->ni_dvp == vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
}
|
|
|
|
if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
|
|
|
|
if (ndp->ni_cnd.cn_flags & RDONLY)
|
|
|
|
NFSVNO_SETEXRDONLY(&nes);
|
|
|
|
else
|
|
|
|
NFSVNO_EXINIT(&nes);
|
|
|
|
nd->nd_repstat = nfsvno_accchk(vp,
|
2009-12-25 20:44:19 +00:00
|
|
|
VWRITE, cred, &nes, p,
|
|
|
|
NFSACCCHK_NOOVERRIDE,
|
|
|
|
NFSACCCHK_VPISLOCKED, NULL);
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_repstat = nfsrv_opencheck(clientid,
|
|
|
|
stateidp, stp, vp, nd, p, nd->nd_repstat);
|
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
tempsize = nvap->na_size;
|
|
|
|
NFSVNO_ATTRINIT(nvap);
|
|
|
|
nvap->na_size = tempsize;
|
|
|
|
nd->nd_repstat = VOP_SETATTR(vp,
|
|
|
|
&nvap->na_vattr, cred);
|
|
|
|
}
|
|
|
|
} else if (vp->v_type == VREG) {
|
|
|
|
nd->nd_repstat = nfsrv_opencheck(clientid,
|
|
|
|
stateidp, stp, vp, nd, p, nd->nd_repstat);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (ndp->ni_cnd.cn_flags & HASBUF)
|
|
|
|
nfsvno_relpathbuf(ndp);
|
|
|
|
if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
|
|
|
|
vrele(ndp->ni_startdir);
|
|
|
|
if (ndp->ni_dvp == ndp->ni_vp)
|
|
|
|
vrele(ndp->ni_dvp);
|
|
|
|
else
|
|
|
|
vput(ndp->ni_dvp);
|
|
|
|
if (ndp->ni_vp)
|
|
|
|
vput(ndp->ni_vp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*vpp = vp;
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Updates the file rev and sets the mtime and ctime
|
|
|
|
* to the current clock time, returning the va_filerev and va_Xtime
|
|
|
|
* values.
|
2013-12-25 01:03:14 +00:00
|
|
|
* Return ESTALE to indicate the vnode is VI_DOOMED.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
2013-12-25 01:03:14 +00:00
|
|
|
int
|
2009-05-04 15:23:58 +00:00
|
|
|
nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
|
|
|
|
struct ucred *cred, struct thread *p)
|
|
|
|
{
|
|
|
|
struct vattr va;
|
|
|
|
|
|
|
|
VATTR_NULL(&va);
|
2013-01-18 18:43:38 +00:00
|
|
|
vfs_timestamp(&va.va_mtime);
|
2013-12-25 01:03:14 +00:00
|
|
|
if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
|
|
|
|
NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
|
|
|
|
if ((vp->v_iflag & VI_DOOMED) != 0)
|
|
|
|
return (ESTALE);
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
(void) VOP_SETATTR(vp, &va, cred);
|
2010-12-24 21:31:18 +00:00
|
|
|
(void) nfsvno_getattr(vp, nvap, cred, p, 1);
|
2013-12-25 01:03:14 +00:00
|
|
|
return (0);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Glue routine to nfsv4_fillattr().
|
|
|
|
*/
|
|
|
|
int
|
2011-04-14 21:49:52 +00:00
|
|
|
nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
|
2009-05-04 15:23:58 +00:00
|
|
|
struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
|
2011-04-14 23:46:15 +00:00
|
|
|
struct ucred *cred, struct thread *p, int isdgram, int reterr,
|
|
|
|
int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
2011-04-14 21:49:52 +00:00
|
|
|
error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
|
2011-04-14 23:46:15 +00:00
|
|
|
attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
|
|
|
|
mounted_on_fileno);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Since the Readdir vnode ops vary, put the entire functions in here. */
|
|
|
|
/*
|
|
|
|
* nfs readdir service
|
|
|
|
* - mallocs what it thinks is enough to read
|
|
|
|
* count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
|
2010-04-04 23:19:11 +00:00
|
|
|
* - calls VOP_READDIR()
|
2009-05-04 15:23:58 +00:00
|
|
|
* - loops around building the reply
|
|
|
|
* if the output generated exceeds count break out of loop
|
|
|
|
* The NFSM_CLGET macro is used here so that the reply will be packed
|
|
|
|
* tightly in mbuf clusters.
|
|
|
|
* - it trims out records with d_fileno == 0
|
|
|
|
* this doesn't matter for Unix clients, but they might confuse clients
|
|
|
|
* for other os'.
|
|
|
|
* - it trims out records with d_type == DT_WHT
|
|
|
|
* these cannot be seen through NFS (unless we extend the protocol)
|
|
|
|
* The alternate call nfsrvd_readdirplus() does lookups as well.
|
|
|
|
* PS: The NFS protocol spec. does not clarify what the "count" byte
|
|
|
|
* argument is a count of.. just name strings and file id's or the
|
|
|
|
* entire reply rpc or ...
|
|
|
|
* I tried just file name and id sizes and it confused the Sun client,
|
|
|
|
* so I am using the full rpc size now. The "paranoia.." comment refers
|
|
|
|
* to including the status longwords that are not a part of the dir.
|
|
|
|
* "entry" structures, but are in the rpc.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
|
|
|
|
struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
struct dirent *dp;
|
|
|
|
u_int32_t *tl;
|
|
|
|
int dirlen;
|
|
|
|
char *cpos, *cend, *rbuf;
|
|
|
|
struct nfsvattr at;
|
|
|
|
int nlen, error = 0, getret = 1;
|
|
|
|
int siz, cnt, fullsiz, eofflag, ncookies;
|
|
|
|
u_int64_t off, toff, verf;
|
|
|
|
u_long *cookies = NULL, *cookiep;
|
|
|
|
struct uio io;
|
|
|
|
struct iovec iv;
|
2010-12-24 18:46:44 +00:00
|
|
|
int not_zfs;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (nd->nd_repstat) {
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (nd->nd_flag & ND_NFSV2) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
off = fxdr_unsigned(u_quad_t, *tl++);
|
|
|
|
} else {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
|
|
|
|
off = fxdr_hyper(tl);
|
|
|
|
tl += 2;
|
|
|
|
verf = fxdr_hyper(tl);
|
|
|
|
tl += 2;
|
|
|
|
}
|
|
|
|
toff = off;
|
|
|
|
cnt = fxdr_unsigned(int, *tl);
|
2010-04-04 23:19:11 +00:00
|
|
|
if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
|
2009-05-04 15:23:58 +00:00
|
|
|
cnt = NFS_SRVMAXDATA(nd);
|
|
|
|
siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
|
|
|
|
fullsiz = siz;
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
|
2010-12-24 21:31:18 +00:00
|
|
|
p, 1);
|
2009-05-04 15:23:58 +00:00
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* va_filerev is not sufficient as a cookie verifier,
|
|
|
|
* since it is not supposed to change when entries are
|
|
|
|
* removed/added unless that offset cookies returned to
|
|
|
|
* the client are no longer valid.
|
|
|
|
*/
|
|
|
|
if (!nd->nd_repstat && toff && verf != at.na_filerev)
|
|
|
|
nd->nd_repstat = NFSERR_BAD_COOKIE;
|
|
|
|
#endif
|
|
|
|
}
|
2013-04-29 20:09:44 +00:00
|
|
|
if (!nd->nd_repstat && vp->v_type != VDIR)
|
|
|
|
nd->nd_repstat = NFSERR_NOTDIR;
|
2010-04-04 23:19:11 +00:00
|
|
|
if (nd->nd_repstat == 0 && cnt == 0) {
|
|
|
|
if (nd->nd_flag & ND_NFSV2)
|
|
|
|
/* NFSv2 does not have NFSERR_TOOSMALL */
|
|
|
|
nd->nd_repstat = EPERM;
|
|
|
|
else
|
|
|
|
nd->nd_repstat = NFSERR_TOOSMALL;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!nd->nd_repstat)
|
2009-12-25 20:44:19 +00:00
|
|
|
nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
|
2009-12-25 20:44:19 +00:00
|
|
|
NFSACCCHK_VPISLOCKED, NULL);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_repstat) {
|
|
|
|
vput(vp);
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2010-12-24 18:46:44 +00:00
|
|
|
not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
|
2009-05-04 15:23:58 +00:00
|
|
|
MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
|
|
|
|
again:
|
|
|
|
eofflag = 0;
|
|
|
|
if (cookies) {
|
|
|
|
free((caddr_t)cookies, M_TEMP);
|
|
|
|
cookies = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
iv.iov_base = rbuf;
|
|
|
|
iv.iov_len = siz;
|
|
|
|
io.uio_iov = &iv;
|
|
|
|
io.uio_iovcnt = 1;
|
|
|
|
io.uio_offset = (off_t)off;
|
|
|
|
io.uio_resid = siz;
|
|
|
|
io.uio_segflg = UIO_SYSSPACE;
|
|
|
|
io.uio_rw = UIO_READ;
|
|
|
|
io.uio_td = NULL;
|
|
|
|
nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
|
|
|
|
&cookies);
|
|
|
|
off = (u_int64_t)io.uio_offset;
|
|
|
|
if (io.uio_resid)
|
|
|
|
siz -= io.uio_resid;
|
|
|
|
|
|
|
|
if (!cookies && !nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_PERM;
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
2010-12-24 21:31:18 +00:00
|
|
|
getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = getret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handles the failed cases. nd->nd_repstat == 0 past here.
|
|
|
|
*/
|
|
|
|
if (nd->nd_repstat) {
|
2010-12-24 20:24:07 +00:00
|
|
|
vput(vp);
|
2009-05-04 15:23:58 +00:00
|
|
|
free((caddr_t)rbuf, M_TEMP);
|
|
|
|
if (cookies)
|
|
|
|
free((caddr_t)cookies, M_TEMP);
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If nothing read, return eof
|
|
|
|
* rpc reply
|
|
|
|
*/
|
|
|
|
if (siz == 0) {
|
2010-12-24 20:24:07 +00:00
|
|
|
vput(vp);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_flag & ND_NFSV2) {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
} else {
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
|
|
|
|
txdr_hyper(at.na_filerev, tl);
|
|
|
|
tl += 2;
|
|
|
|
}
|
|
|
|
*tl++ = newnfs_false;
|
|
|
|
*tl = newnfs_true;
|
|
|
|
FREE((caddr_t)rbuf, M_TEMP);
|
|
|
|
FREE((caddr_t)cookies, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for degenerate cases of nothing useful read.
|
|
|
|
* If so go try again
|
|
|
|
*/
|
|
|
|
cpos = rbuf;
|
|
|
|
cend = rbuf + siz;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep = cookies;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For some reason FreeBSD's ufs_readdir() chooses to back the
|
|
|
|
* directory offset up to a block boundary, so it is necessary to
|
|
|
|
* skip over the records that precede the requested offset. This
|
|
|
|
* requires the assumption that file offset cookies monotonically
|
|
|
|
* increase.
|
2010-12-24 18:46:44 +00:00
|
|
|
* Since the offset cookies don't monotonically increase for ZFS,
|
|
|
|
* this is not done when ZFS is the file system.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
while (cpos < cend && ncookies > 0 &&
|
|
|
|
(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
|
2010-12-24 18:46:44 +00:00
|
|
|
(not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
|
2009-05-04 15:23:58 +00:00
|
|
|
cpos += dp->d_reclen;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep++;
|
|
|
|
ncookies--;
|
|
|
|
}
|
|
|
|
if (cpos >= cend || ncookies == 0) {
|
|
|
|
siz = fullsiz;
|
|
|
|
toff = off;
|
|
|
|
goto again;
|
|
|
|
}
|
2010-12-24 20:24:07 +00:00
|
|
|
vput(vp);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* dirlen is the size of the reply, including all XDR and must
|
|
|
|
* not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
|
|
|
|
* if the XDR should be included in "count", but to be safe, we do.
|
|
|
|
* (Include the two booleans at the end of the reply in dirlen now.)
|
|
|
|
*/
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
txdr_hyper(at.na_filerev, tl);
|
|
|
|
dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
|
|
|
|
} else {
|
|
|
|
dirlen = 2 * NFSX_UNSIGNED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Loop through the records and build reply */
|
|
|
|
while (cpos < cend && ncookies > 0) {
|
|
|
|
nlen = dp->d_namlen;
|
|
|
|
if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
|
|
|
|
nlen <= NFS_MAXNAMLEN) {
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
|
|
|
|
else
|
|
|
|
dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
|
|
|
|
if (dirlen > cnt) {
|
|
|
|
eofflag = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the directory record xdr from
|
|
|
|
* the dirent entry.
|
|
|
|
*/
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_true;
|
|
|
|
*tl++ = 0;
|
|
|
|
} else {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_true;
|
|
|
|
}
|
|
|
|
*tl = txdr_unsigned(dp->d_fileno);
|
|
|
|
(void) nfsm_strtom(nd, dp->d_name, nlen);
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = 0;
|
|
|
|
} else
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
*tl = txdr_unsigned(*cookiep);
|
|
|
|
}
|
|
|
|
cpos += dp->d_reclen;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep++;
|
|
|
|
ncookies--;
|
|
|
|
}
|
|
|
|
if (cpos < cend)
|
|
|
|
eofflag = 0;
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_false;
|
|
|
|
if (eofflag)
|
|
|
|
*tl = newnfs_true;
|
|
|
|
else
|
|
|
|
*tl = newnfs_false;
|
|
|
|
FREE((caddr_t)rbuf, M_TEMP);
|
|
|
|
FREE((caddr_t)cookies, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (0);
|
|
|
|
nfsmout:
|
|
|
|
vput(vp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Readdirplus for V3 and Readdir for V4.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
|
|
|
|
struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
struct dirent *dp;
|
|
|
|
u_int32_t *tl;
|
|
|
|
int dirlen;
|
|
|
|
char *cpos, *cend, *rbuf;
|
|
|
|
struct vnode *nvp;
|
|
|
|
fhandle_t nfh;
|
|
|
|
struct nfsvattr nva, at, *nvap = &nva;
|
|
|
|
struct mbuf *mb0, *mb1;
|
|
|
|
struct nfsreferral *refp;
|
2009-11-23 16:08:15 +00:00
|
|
|
int nlen, r, error = 0, getret = 1, usevget = 1;
|
2009-05-04 15:23:58 +00:00
|
|
|
int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
|
|
|
|
caddr_t bpos0, bpos1;
|
|
|
|
u_int64_t off, toff, verf;
|
|
|
|
u_long *cookies = NULL, *cookiep;
|
|
|
|
nfsattrbit_t attrbits, rderrbits, savbits;
|
|
|
|
struct uio io;
|
|
|
|
struct iovec iv;
|
2009-11-23 16:08:15 +00:00
|
|
|
struct componentname cn;
|
2011-04-14 23:46:15 +00:00
|
|
|
int at_root, needs_unbusy, not_zfs, supports_nfsv4acls;
|
2011-04-14 21:49:52 +00:00
|
|
|
struct mount *mp, *new_mp;
|
|
|
|
uint64_t mounted_on_fileno;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (nd->nd_repstat) {
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
|
|
|
|
off = fxdr_hyper(tl);
|
|
|
|
toff = off;
|
|
|
|
tl += 2;
|
|
|
|
verf = fxdr_hyper(tl);
|
|
|
|
tl += 2;
|
|
|
|
siz = fxdr_unsigned(int, *tl++);
|
|
|
|
cnt = fxdr_unsigned(int, *tl);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the server's maximum data transfer size as the upper bound
|
|
|
|
* on reply datalen.
|
|
|
|
*/
|
2010-04-04 23:19:11 +00:00
|
|
|
if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
|
2009-05-04 15:23:58 +00:00
|
|
|
cnt = NFS_SRVMAXDATA(nd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* siz is a "hint" of how much directory information (name, fileid,
|
|
|
|
* cookie) should be in the reply. At least one client "hints" 0,
|
|
|
|
* so I set it to cnt for that case. I also round it up to the
|
|
|
|
* next multiple of DIRBLKSIZ.
|
|
|
|
*/
|
2010-04-04 23:19:11 +00:00
|
|
|
if (siz <= 0)
|
2009-05-04 15:23:58 +00:00
|
|
|
siz = cnt;
|
|
|
|
siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
|
|
|
|
|
|
|
|
if (nd->nd_flag & ND_NFSV4) {
|
|
|
|
error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
|
|
|
|
if (error)
|
|
|
|
goto nfsmout;
|
|
|
|
NFSSET_ATTRBIT(&savbits, &attrbits);
|
|
|
|
NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
|
|
|
|
NFSZERO_ATTRBIT(&rderrbits);
|
|
|
|
NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
|
|
|
|
} else {
|
|
|
|
NFSZERO_ATTRBIT(&attrbits);
|
|
|
|
}
|
|
|
|
fullsiz = siz;
|
2010-12-24 21:31:18 +00:00
|
|
|
nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
if (off && verf != at.na_filerev) {
|
|
|
|
/*
|
|
|
|
* va_filerev is not sufficient as a cookie verifier,
|
|
|
|
* since it is not supposed to change when entries are
|
|
|
|
* removed/added unless that offset cookies returned to
|
|
|
|
* the client are no longer valid.
|
|
|
|
*/
|
|
|
|
#if 0
|
|
|
|
if (nd->nd_flag & ND_NFSV4) {
|
|
|
|
nd->nd_repstat = NFSERR_NOTSAME;
|
|
|
|
} else {
|
|
|
|
nd->nd_repstat = NFSERR_BAD_COOKIE;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
} else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
|
|
|
|
nd->nd_repstat = NFSERR_BAD_COOKIE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!nd->nd_repstat && vp->v_type != VDIR)
|
|
|
|
nd->nd_repstat = NFSERR_NOTDIR;
|
|
|
|
if (!nd->nd_repstat && cnt == 0)
|
|
|
|
nd->nd_repstat = NFSERR_TOOSMALL;
|
|
|
|
if (!nd->nd_repstat)
|
2009-12-25 20:44:19 +00:00
|
|
|
nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
|
2009-12-25 20:44:19 +00:00
|
|
|
NFSACCCHK_VPISLOCKED, NULL);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_repstat) {
|
|
|
|
vput(vp);
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2010-12-24 18:46:44 +00:00
|
|
|
not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
|
|
|
|
again:
|
|
|
|
eofflag = 0;
|
|
|
|
if (cookies) {
|
|
|
|
free((caddr_t)cookies, M_TEMP);
|
|
|
|
cookies = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
iv.iov_base = rbuf;
|
|
|
|
iv.iov_len = siz;
|
|
|
|
io.uio_iov = &iv;
|
|
|
|
io.uio_iovcnt = 1;
|
|
|
|
io.uio_offset = (off_t)off;
|
|
|
|
io.uio_resid = siz;
|
|
|
|
io.uio_segflg = UIO_SYSSPACE;
|
|
|
|
io.uio_rw = UIO_READ;
|
|
|
|
io.uio_td = NULL;
|
|
|
|
nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
|
|
|
|
&cookies);
|
|
|
|
off = (u_int64_t)io.uio_offset;
|
|
|
|
if (io.uio_resid)
|
|
|
|
siz -= io.uio_resid;
|
|
|
|
|
2010-12-24 21:31:18 +00:00
|
|
|
getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (!cookies && !nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_PERM;
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = getret;
|
|
|
|
if (nd->nd_repstat) {
|
2009-11-23 16:08:15 +00:00
|
|
|
vput(vp);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (cookies)
|
|
|
|
free((caddr_t)cookies, M_TEMP);
|
|
|
|
free((caddr_t)rbuf, M_TEMP);
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If nothing read, return eof
|
|
|
|
* rpc reply
|
|
|
|
*/
|
|
|
|
if (siz == 0) {
|
2009-11-23 16:08:15 +00:00
|
|
|
vput(vp);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
|
|
|
|
txdr_hyper(at.na_filerev, tl);
|
|
|
|
tl += 2;
|
|
|
|
*tl++ = newnfs_false;
|
|
|
|
*tl = newnfs_true;
|
|
|
|
free((caddr_t)cookies, M_TEMP);
|
|
|
|
free((caddr_t)rbuf, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for degenerate cases of nothing useful read.
|
|
|
|
* If so go try again
|
|
|
|
*/
|
|
|
|
cpos = rbuf;
|
|
|
|
cend = rbuf + siz;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep = cookies;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For some reason FreeBSD's ufs_readdir() chooses to back the
|
|
|
|
* directory offset up to a block boundary, so it is necessary to
|
|
|
|
* skip over the records that precede the requested offset. This
|
|
|
|
* requires the assumption that file offset cookies monotonically
|
|
|
|
* increase.
|
2010-12-24 18:46:44 +00:00
|
|
|
* Since the offset cookies don't monotonically increase for ZFS,
|
|
|
|
* this is not done when ZFS is the file system.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
while (cpos < cend && ncookies > 0 &&
|
|
|
|
(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
|
2010-12-24 18:46:44 +00:00
|
|
|
(not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
|
2009-05-04 15:23:58 +00:00
|
|
|
((nd->nd_flag & ND_NFSV4) &&
|
|
|
|
((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
|
|
|
|
(dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
|
|
|
|
cpos += dp->d_reclen;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep++;
|
|
|
|
ncookies--;
|
|
|
|
}
|
|
|
|
if (cpos >= cend || ncookies == 0) {
|
|
|
|
siz = fullsiz;
|
|
|
|
toff = off;
|
|
|
|
goto again;
|
|
|
|
}
|
2011-01-09 02:10:54 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Busy the file system so that the mount point won't go away
|
|
|
|
* and, as such, VFS_VGET() can be used safely.
|
|
|
|
*/
|
|
|
|
mp = vp->v_mount;
|
|
|
|
vfs_ref(mp);
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2011-01-09 02:10:54 +00:00
|
|
|
nd->nd_repstat = vfs_busy(mp, 0);
|
|
|
|
vfs_rel(mp);
|
|
|
|
if (nd->nd_repstat != 0) {
|
|
|
|
vrele(vp);
|
|
|
|
free(cookies, M_TEMP);
|
|
|
|
free(rbuf, M_TEMP);
|
|
|
|
if (nd->nd_flag & ND_NFSV3)
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2011-01-09 02:10:54 +00:00
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2013-12-24 22:24:17 +00:00
|
|
|
/*
|
|
|
|
* Check to see if entries in this directory can be safely acquired
|
|
|
|
* via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
|
|
|
|
* ZFS snapshot directories need VOP_LOOKUP(), so that any
|
|
|
|
* automount of the snapshot directory that is required will
|
|
|
|
* be done.
|
|
|
|
* This needs to be done here for NFSv4, since NFSv4 never does
|
|
|
|
* a VFS_VGET() for "." or "..".
|
|
|
|
*/
|
|
|
|
if (not_zfs == 0) {
|
|
|
|
r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
|
|
|
|
if (r == EOPNOTSUPP) {
|
|
|
|
usevget = 0;
|
|
|
|
cn.cn_nameiop = LOOKUP;
|
|
|
|
cn.cn_lkflags = LK_SHARED | LK_RETRY;
|
|
|
|
cn.cn_cred = nd->nd_cred;
|
|
|
|
cn.cn_thread = p;
|
|
|
|
} else if (r == 0)
|
|
|
|
vput(nvp);
|
|
|
|
}
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Save this position, in case there is an error before one entry
|
|
|
|
* is created.
|
|
|
|
*/
|
|
|
|
mb0 = nd->nd_mb;
|
|
|
|
bpos0 = nd->nd_bpos;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill in the first part of the reply.
|
|
|
|
* dirlen is the reply length in bytes and cannot exceed cnt.
|
|
|
|
* (Include the two booleans at the end of the reply in dirlen now,
|
|
|
|
* so we recognize when we have exceeded cnt.)
|
|
|
|
*/
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
|
|
|
|
nfsrv_postopattr(nd, getret, &at);
|
|
|
|
} else {
|
|
|
|
dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
|
|
|
|
}
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
|
|
|
|
txdr_hyper(at.na_filerev, tl);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save this position, in case there is an empty reply needed.
|
|
|
|
*/
|
|
|
|
mb1 = nd->nd_mb;
|
|
|
|
bpos1 = nd->nd_bpos;
|
|
|
|
|
|
|
|
/* Loop through the records and build reply */
|
|
|
|
entrycnt = 0;
|
|
|
|
while (cpos < cend && ncookies > 0 && dirlen < cnt) {
|
|
|
|
nlen = dp->d_namlen;
|
|
|
|
if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
|
|
|
|
nlen <= NFS_MAXNAMLEN &&
|
|
|
|
((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
|
|
|
|
(nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
|
|
|
|
|| (nlen == 1 && dp->d_name[0] != '.'))) {
|
|
|
|
/*
|
|
|
|
* Save the current position in the reply, in case
|
|
|
|
* this entry exceeds cnt.
|
|
|
|
*/
|
|
|
|
mb1 = nd->nd_mb;
|
|
|
|
bpos1 = nd->nd_bpos;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For readdir_and_lookup get the vnode using
|
|
|
|
* the file number.
|
|
|
|
*/
|
|
|
|
nvp = NULL;
|
|
|
|
refp = NULL;
|
|
|
|
r = 0;
|
2011-04-14 21:49:52 +00:00
|
|
|
at_root = 0;
|
|
|
|
needs_unbusy = 0;
|
|
|
|
new_mp = mp;
|
|
|
|
mounted_on_fileno = (uint64_t)dp->d_fileno;
|
2009-05-04 15:23:58 +00:00
|
|
|
if ((nd->nd_flag & ND_NFSV3) ||
|
|
|
|
NFSNONZERO_ATTRBIT(&savbits)) {
|
|
|
|
if (nd->nd_flag & ND_NFSV4)
|
|
|
|
refp = nfsv4root_getreferral(NULL,
|
|
|
|
vp, dp->d_fileno);
|
2009-11-23 16:08:15 +00:00
|
|
|
if (refp == NULL) {
|
|
|
|
if (usevget)
|
2011-01-09 02:10:54 +00:00
|
|
|
r = VFS_VGET(mp, dp->d_fileno,
|
|
|
|
LK_SHARED, &nvp);
|
2009-11-23 16:08:15 +00:00
|
|
|
else
|
|
|
|
r = EOPNOTSUPP;
|
|
|
|
if (r == EOPNOTSUPP) {
|
|
|
|
if (usevget) {
|
|
|
|
usevget = 0;
|
|
|
|
cn.cn_nameiop = LOOKUP;
|
|
|
|
cn.cn_lkflags =
|
2010-12-25 21:56:25 +00:00
|
|
|
LK_SHARED |
|
2009-11-23 16:08:15 +00:00
|
|
|
LK_RETRY;
|
|
|
|
cn.cn_cred =
|
|
|
|
nd->nd_cred;
|
|
|
|
cn.cn_thread = p;
|
|
|
|
}
|
|
|
|
cn.cn_nameptr = dp->d_name;
|
|
|
|
cn.cn_namelen = nlen;
|
|
|
|
cn.cn_flags = ISLASTCN |
|
2012-10-22 17:50:54 +00:00
|
|
|
NOFOLLOW | LOCKLEAF;
|
2009-11-23 16:08:15 +00:00
|
|
|
if (nlen == 2 &&
|
|
|
|
dp->d_name[0] == '.' &&
|
|
|
|
dp->d_name[1] == '.')
|
|
|
|
cn.cn_flags |=
|
|
|
|
ISDOTDOT;
|
2011-07-16 08:05:31 +00:00
|
|
|
if (NFSVOPLOCK(vp, LK_SHARED)
|
2010-12-24 20:24:07 +00:00
|
|
|
!= 0) {
|
|
|
|
nd->nd_repstat = EPERM;
|
|
|
|
break;
|
|
|
|
}
|
2010-10-21 18:49:12 +00:00
|
|
|
if ((vp->v_vflag & VV_ROOT) != 0
|
|
|
|
&& (cn.cn_flags & ISDOTDOT)
|
|
|
|
!= 0) {
|
|
|
|
vref(vp);
|
|
|
|
nvp = vp;
|
|
|
|
r = 0;
|
2011-04-09 23:55:27 +00:00
|
|
|
} else {
|
2010-10-21 18:49:12 +00:00
|
|
|
r = VOP_LOOKUP(vp, &nvp,
|
|
|
|
&cn);
|
2011-04-09 23:55:27 +00:00
|
|
|
if (vp != nvp)
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(vp,
|
2011-04-09 23:55:27 +00:00
|
|
|
0);
|
|
|
|
}
|
2009-11-23 16:08:15 +00:00
|
|
|
}
|
2011-04-14 21:49:52 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For NFSv4, check to see if nvp is
|
|
|
|
* a mount point and get the mount
|
|
|
|
* point vnode, as required.
|
|
|
|
*/
|
|
|
|
if (r == 0 &&
|
|
|
|
nfsrv_enable_crossmntpt != 0 &&
|
|
|
|
(nd->nd_flag & ND_NFSV4) != 0 &&
|
|
|
|
nvp->v_type == VDIR &&
|
|
|
|
nvp->v_mountedhere != NULL) {
|
|
|
|
new_mp = nvp->v_mountedhere;
|
|
|
|
r = vfs_busy(new_mp, 0);
|
|
|
|
vput(nvp);
|
|
|
|
nvp = NULL;
|
|
|
|
if (r == 0) {
|
|
|
|
r = VFS_ROOT(new_mp,
|
|
|
|
LK_SHARED, &nvp);
|
|
|
|
needs_unbusy = 1;
|
|
|
|
if (r == 0)
|
|
|
|
at_root = 1;
|
|
|
|
}
|
|
|
|
}
|
2009-11-23 16:08:15 +00:00
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!r) {
|
|
|
|
if (refp == NULL &&
|
|
|
|
((nd->nd_flag & ND_NFSV3) ||
|
|
|
|
NFSNONZERO_ATTRBIT(&attrbits))) {
|
|
|
|
r = nfsvno_getfh(nvp, &nfh, p);
|
|
|
|
if (!r)
|
|
|
|
r = nfsvno_getattr(nvp, nvap,
|
2010-12-24 21:31:18 +00:00
|
|
|
nd->nd_cred, p, 1);
|
2013-12-24 22:24:17 +00:00
|
|
|
if (r == 0 && not_zfs == 0 &&
|
|
|
|
nfsrv_enable_crossmntpt != 0 &&
|
|
|
|
(nd->nd_flag & ND_NFSV4) != 0 &&
|
|
|
|
nvp->v_type == VDIR &&
|
|
|
|
vp->v_mount != nvp->v_mount) {
|
|
|
|
/*
|
|
|
|
* For a ZFS snapshot, there is a
|
|
|
|
* pseudo mount that does not set
|
|
|
|
* v_mountedhere, so it needs to
|
|
|
|
* be detected via a different
|
|
|
|
* mount structure.
|
|
|
|
*/
|
|
|
|
at_root = 1;
|
|
|
|
if (new_mp == mp)
|
|
|
|
new_mp = nvp->v_mount;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
nvp = NULL;
|
|
|
|
}
|
|
|
|
if (r) {
|
|
|
|
if (!NFSISSET_ATTRBIT(&attrbits,
|
|
|
|
NFSATTRBIT_RDATTRERROR)) {
|
|
|
|
if (nvp != NULL)
|
|
|
|
vput(nvp);
|
2011-04-14 21:49:52 +00:00
|
|
|
if (needs_unbusy != 0)
|
|
|
|
vfs_unbusy(new_mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_repstat = r;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build the directory record xdr
|
|
|
|
*/
|
|
|
|
if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_true;
|
|
|
|
*tl++ = 0;
|
|
|
|
*tl = txdr_unsigned(dp->d_fileno);
|
|
|
|
dirlen += nfsm_strtom(nd, dp->d_name, nlen);
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = 0;
|
|
|
|
*tl = txdr_unsigned(*cookiep);
|
|
|
|
nfsrv_postopattr(nd, 0, nvap);
|
|
|
|
dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
|
|
|
|
dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
|
|
|
|
if (nvp != NULL)
|
|
|
|
vput(nvp);
|
|
|
|
} else {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_true;
|
|
|
|
*tl++ = 0;
|
|
|
|
*tl = txdr_unsigned(*cookiep);
|
|
|
|
dirlen += nfsm_strtom(nd, dp->d_name, nlen);
|
2011-04-14 23:46:15 +00:00
|
|
|
if (nvp != NULL) {
|
|
|
|
supports_nfsv4acls =
|
|
|
|
nfs_supportsnfsv4acls(nvp);
|
2011-07-16 08:05:36 +00:00
|
|
|
NFSVOPUNLOCK(nvp, 0);
|
2011-04-14 23:46:15 +00:00
|
|
|
} else
|
|
|
|
supports_nfsv4acls = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (refp != NULL) {
|
|
|
|
dirlen += nfsrv_putreferralattr(nd,
|
|
|
|
&savbits, refp, 0,
|
|
|
|
&nd->nd_repstat);
|
|
|
|
if (nd->nd_repstat) {
|
|
|
|
if (nvp != NULL)
|
|
|
|
vrele(nvp);
|
2011-04-14 21:49:52 +00:00
|
|
|
if (needs_unbusy != 0)
|
|
|
|
vfs_unbusy(new_mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (r) {
|
2011-04-14 21:49:52 +00:00
|
|
|
dirlen += nfsvno_fillattr(nd, new_mp,
|
|
|
|
nvp, nvap, &nfh, r, &rderrbits,
|
2011-04-14 23:46:15 +00:00
|
|
|
nd->nd_cred, p, isdgram, 0,
|
|
|
|
supports_nfsv4acls, at_root,
|
2011-04-14 21:49:52 +00:00
|
|
|
mounted_on_fileno);
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
2011-04-14 21:49:52 +00:00
|
|
|
dirlen += nfsvno_fillattr(nd, new_mp,
|
|
|
|
nvp, nvap, &nfh, r, &attrbits,
|
2011-04-14 23:46:15 +00:00
|
|
|
nd->nd_cred, p, isdgram, 0,
|
|
|
|
supports_nfsv4acls, at_root,
|
2011-04-14 21:49:52 +00:00
|
|
|
mounted_on_fileno);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (nvp != NULL)
|
|
|
|
vrele(nvp);
|
|
|
|
dirlen += (3 * NFSX_UNSIGNED);
|
|
|
|
}
|
2011-04-14 21:49:52 +00:00
|
|
|
if (needs_unbusy != 0)
|
|
|
|
vfs_unbusy(new_mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (dirlen <= cnt)
|
|
|
|
entrycnt++;
|
|
|
|
}
|
|
|
|
cpos += dp->d_reclen;
|
|
|
|
dp = (struct dirent *)cpos;
|
|
|
|
cookiep++;
|
|
|
|
ncookies--;
|
|
|
|
}
|
2010-12-24 20:24:07 +00:00
|
|
|
vrele(vp);
|
2011-01-09 02:10:54 +00:00
|
|
|
vfs_unbusy(mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If dirlen > cnt, we must strip off the last entry. If that
|
|
|
|
* results in an empty reply, report NFSERR_TOOSMALL.
|
|
|
|
*/
|
|
|
|
if (dirlen > cnt || nd->nd_repstat) {
|
|
|
|
if (!nd->nd_repstat && entrycnt == 0)
|
|
|
|
nd->nd_repstat = NFSERR_TOOSMALL;
|
|
|
|
if (nd->nd_repstat)
|
|
|
|
newnfs_trimtrailing(nd, mb0, bpos0);
|
|
|
|
else
|
|
|
|
newnfs_trimtrailing(nd, mb1, bpos1);
|
|
|
|
eofflag = 0;
|
|
|
|
} else if (cpos < cend)
|
|
|
|
eofflag = 0;
|
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
*tl++ = newnfs_false;
|
|
|
|
if (eofflag)
|
|
|
|
*tl = newnfs_true;
|
|
|
|
else
|
|
|
|
*tl = newnfs_false;
|
|
|
|
}
|
|
|
|
FREE((caddr_t)cookies, M_TEMP);
|
|
|
|
FREE((caddr_t)rbuf, M_TEMP);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (0);
|
|
|
|
nfsmout:
|
|
|
|
vput(vp);
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the settable attributes out of the mbuf list.
|
|
|
|
* (Return 0 or EBADRPC)
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
|
|
|
|
nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
|
|
|
|
{
|
|
|
|
u_int32_t *tl;
|
|
|
|
struct nfsv2_sattr *sp;
|
|
|
|
int error = 0, toclient = 0;
|
|
|
|
|
|
|
|
switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
|
|
|
|
case ND_NFSV2:
|
|
|
|
NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
|
|
|
|
/*
|
|
|
|
* Some old clients didn't fill in the high order 16bits.
|
|
|
|
* --> check the low order 2 bytes for 0xffff
|
|
|
|
*/
|
|
|
|
if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
|
|
|
|
nvap->na_mode = nfstov_mode(sp->sa_mode);
|
|
|
|
if (sp->sa_uid != newnfs_xdrneg1)
|
|
|
|
nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
|
|
|
|
if (sp->sa_gid != newnfs_xdrneg1)
|
|
|
|
nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
|
|
|
|
if (sp->sa_size != newnfs_xdrneg1)
|
|
|
|
nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
|
|
|
|
if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
|
|
|
|
#ifdef notyet
|
|
|
|
fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
|
|
|
|
#else
|
|
|
|
nvap->na_atime.tv_sec =
|
|
|
|
fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
|
|
|
|
nvap->na_atime.tv_nsec = 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
|
|
|
|
fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
|
|
|
|
break;
|
|
|
|
case ND_NFSV3:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (*tl == newnfs_true) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
nvap->na_mode = nfstov_mode(*tl);
|
|
|
|
}
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (*tl == newnfs_true) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
nvap->na_uid = fxdr_unsigned(uid_t, *tl);
|
|
|
|
}
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (*tl == newnfs_true) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
nvap->na_gid = fxdr_unsigned(gid_t, *tl);
|
|
|
|
}
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (*tl == newnfs_true) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
nvap->na_size = fxdr_hyper(tl);
|
|
|
|
}
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
switch (fxdr_unsigned(int, *tl)) {
|
|
|
|
case NFSV3SATTRTIME_TOCLIENT:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
fxdr_nfsv3time(tl, &nvap->na_atime);
|
|
|
|
toclient = 1;
|
|
|
|
break;
|
|
|
|
case NFSV3SATTRTIME_TOSERVER:
|
2013-01-18 18:43:38 +00:00
|
|
|
vfs_timestamp(&nvap->na_atime);
|
2009-05-04 15:23:58 +00:00
|
|
|
nvap->na_vaflags |= VA_UTIMES_NULL;
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
switch (fxdr_unsigned(int, *tl)) {
|
|
|
|
case NFSV3SATTRTIME_TOCLIENT:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
|
|
|
|
fxdr_nfsv3time(tl, &nvap->na_mtime);
|
|
|
|
nvap->na_vaflags &= ~VA_UTIMES_NULL;
|
|
|
|
break;
|
|
|
|
case NFSV3SATTRTIME_TOSERVER:
|
2013-01-18 18:43:38 +00:00
|
|
|
vfs_timestamp(&nvap->na_mtime);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!toclient)
|
|
|
|
nvap->na_vaflags |= VA_UTIMES_NULL;
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
break;
|
|
|
|
case ND_NFSV4:
|
|
|
|
error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
|
|
|
|
};
|
|
|
|
nfsmout:
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle the setable attributes for V4.
|
|
|
|
* Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
|
|
|
|
nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
|
|
|
|
{
|
|
|
|
u_int32_t *tl;
|
|
|
|
int attrsum = 0;
|
|
|
|
int i, j;
|
|
|
|
int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
|
|
|
|
int toclient = 0;
|
|
|
|
u_char *cp, namestr[NFSV4_SMALLSTR + 1];
|
|
|
|
uid_t uid;
|
|
|
|
gid_t gid;
|
|
|
|
|
|
|
|
error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
|
|
|
|
if (error)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto nfsmout;
|
2009-05-04 15:23:58 +00:00
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
attrsize = fxdr_unsigned(int, *tl);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Loop around getting the setable attributes. If an unsupported
|
|
|
|
* one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
|
|
|
|
*/
|
|
|
|
if (retnotsup) {
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
bitpos = NFSATTRBIT_MAX;
|
|
|
|
} else {
|
|
|
|
bitpos = 0;
|
|
|
|
}
|
|
|
|
for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
|
|
|
|
if (attrsum > attrsize) {
|
|
|
|
error = NFSERR_BADXDR;
|
|
|
|
goto nfsmout;
|
|
|
|
}
|
|
|
|
if (NFSISSET_ATTRBIT(attrbitp, bitpos))
|
|
|
|
switch (bitpos) {
|
|
|
|
case NFSATTRBIT_SIZE:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
|
|
|
|
nvap->na_size = fxdr_hyper(tl);
|
|
|
|
attrsum += NFSX_HYPER;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_ACL:
|
|
|
|
error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
|
|
|
|
p);
|
|
|
|
if (error)
|
|
|
|
goto nfsmout;
|
|
|
|
if (aceerr && !nd->nd_repstat)
|
2011-07-16 08:04:57 +00:00
|
|
|
nd->nd_repstat = aceerr;
|
2009-05-04 15:23:58 +00:00
|
|
|
attrsum += aclsize;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_ARCHIVE:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_HIDDEN:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_MIMETYPE:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
i = fxdr_unsigned(int, *tl);
|
|
|
|
error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
|
|
|
|
if (error)
|
|
|
|
goto nfsmout;
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_MODE:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
nvap->na_mode = nfstov_mode(*tl);
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_OWNER:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
j = fxdr_unsigned(int, *tl);
|
2011-07-16 08:51:09 +00:00
|
|
|
if (j < 0) {
|
|
|
|
error = NFSERR_BADXDR;
|
|
|
|
goto nfsmout;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
|
|
|
|
else
|
|
|
|
cp = namestr;
|
|
|
|
error = nfsrv_mtostr(nd, cp, j);
|
|
|
|
if (error) {
|
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
free(cp, M_NFSSTRING);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto nfsmout;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (!nd->nd_repstat) {
|
2012-09-20 02:49:25 +00:00
|
|
|
nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid,
|
|
|
|
p);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nvap->na_uid = uid;
|
|
|
|
}
|
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
free(cp, M_NFSSTRING);
|
|
|
|
attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_OWNERGROUP:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
j = fxdr_unsigned(int, *tl);
|
2011-07-16 08:51:09 +00:00
|
|
|
if (j < 0) {
|
|
|
|
error = NFSERR_BADXDR;
|
|
|
|
goto nfsmout;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
|
|
|
|
else
|
|
|
|
cp = namestr;
|
|
|
|
error = nfsrv_mtostr(nd, cp, j);
|
|
|
|
if (error) {
|
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
free(cp, M_NFSSTRING);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto nfsmout;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
if (!nd->nd_repstat) {
|
2012-09-20 02:49:25 +00:00
|
|
|
nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid,
|
|
|
|
p);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nvap->na_gid = gid;
|
|
|
|
}
|
|
|
|
if (j > NFSV4_SMALLSTR)
|
|
|
|
free(cp, M_NFSSTRING);
|
|
|
|
attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_SYSTEM:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_TIMEACCESSSET:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
|
|
|
|
fxdr_nfsv4time(tl, &nvap->na_atime);
|
|
|
|
toclient = 1;
|
|
|
|
attrsum += NFSX_V4TIME;
|
|
|
|
} else {
|
2013-01-18 18:43:38 +00:00
|
|
|
vfs_timestamp(&nvap->na_atime);
|
2009-05-04 15:23:58 +00:00
|
|
|
nvap->na_vaflags |= VA_UTIMES_NULL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_TIMEBACKUP:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += NFSX_V4TIME;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_TIMECREATE:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
|
|
|
|
if (!nd->nd_repstat)
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
attrsum += NFSX_V4TIME;
|
|
|
|
break;
|
|
|
|
case NFSATTRBIT_TIMEMODIFYSET:
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
attrsum += NFSX_UNSIGNED;
|
|
|
|
if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
|
|
|
|
fxdr_nfsv4time(tl, &nvap->na_mtime);
|
|
|
|
nvap->na_vaflags &= ~VA_UTIMES_NULL;
|
|
|
|
attrsum += NFSX_V4TIME;
|
|
|
|
} else {
|
2013-01-18 18:43:38 +00:00
|
|
|
vfs_timestamp(&nvap->na_mtime);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (!toclient)
|
|
|
|
nvap->na_vaflags |= VA_UTIMES_NULL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
nd->nd_repstat = NFSERR_ATTRNOTSUPP;
|
|
|
|
/*
|
|
|
|
* set bitpos so we drop out of the loop.
|
|
|
|
*/
|
|
|
|
bitpos = NFSATTRBIT_MAX;
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* some clients pad the attrlist, so we need to skip over the
|
|
|
|
* padding.
|
|
|
|
*/
|
|
|
|
if (attrsum > attrsize) {
|
|
|
|
error = NFSERR_BADXDR;
|
|
|
|
} else {
|
|
|
|
attrsize = NFSM_RNDUP(attrsize);
|
|
|
|
if (attrsum < attrsize)
|
|
|
|
error = nfsm_advance(nd, attrsize - attrsum, -1);
|
|
|
|
}
|
|
|
|
nfsmout:
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE2(error, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check/setup export credentials.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
|
|
|
|
struct ucred *credanon)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check/setup credentials.
|
|
|
|
*/
|
|
|
|
if (nd->nd_flag & ND_GSS)
|
2009-05-14 21:39:08 +00:00
|
|
|
exp->nes_exflag &= ~MNT_EXPORTANON;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
2009-05-14 21:39:08 +00:00
|
|
|
* Check to see if the operation is allowed for this security flavor.
|
2009-05-04 15:23:58 +00:00
|
|
|
* RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
|
|
|
|
* AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
|
2009-05-14 21:39:08 +00:00
|
|
|
* Also, allow Secinfo, so that it can acquire the correct flavor(s).
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
2009-05-14 21:39:08 +00:00
|
|
|
if (nfsvno_testexp(nd, exp) &&
|
|
|
|
nd->nd_procnum != NFSV4OP_SECINFO &&
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_procnum != NFSPROC_FSINFO) {
|
|
|
|
if (nd->nd_flag & ND_NFSV4)
|
|
|
|
error = NFSERR_WRONGSEC;
|
|
|
|
else
|
|
|
|
error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if the file system is exported V4 only.
|
|
|
|
*/
|
2011-07-16 08:51:09 +00:00
|
|
|
if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
|
|
|
|
error = NFSERR_PROGNOTV4;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, map the user credentials.
|
|
|
|
* (Note that ND_AUTHNONE will only be set for an NFSv3
|
|
|
|
* Fsinfo RPC. If set for anything else, this code might need
|
|
|
|
* to change.)
|
|
|
|
*/
|
|
|
|
if (NFSVNO_EXPORTED(exp) &&
|
|
|
|
((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
|
|
|
|
NFSVNO_EXPORTANON(exp) ||
|
|
|
|
(nd->nd_flag & ND_AUTHNONE))) {
|
|
|
|
nd->nd_cred->cr_uid = credanon->cr_uid;
|
|
|
|
nd->nd_cred->cr_gid = credanon->cr_gid;
|
2009-06-19 17:10:35 +00:00
|
|
|
crsetgroups(nd->nd_cred, credanon->cr_ngroups,
|
|
|
|
credanon->cr_groups);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE2(error, nd);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check exports.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
|
|
|
|
struct ucred **credp)
|
|
|
|
{
|
2009-05-14 21:39:08 +00:00
|
|
|
int i, error, *secflavors;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
|
2009-05-14 21:39:08 +00:00
|
|
|
&exp->nes_numsecflavor, &secflavors);
|
|
|
|
if (error) {
|
|
|
|
if (nfs_rootfhset) {
|
|
|
|
exp->nes_exflag = 0;
|
|
|
|
exp->nes_numsecflavor = 0;
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Copy the security flavors. */
|
|
|
|
for (i = 0; i < exp->nes_numsecflavor; i++)
|
|
|
|
exp->nes_secflavors[i] = secflavors[i];
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a vnode for a file handle and export stuff.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
|
2010-12-25 21:56:25 +00:00
|
|
|
int lktype, struct vnode **vpp, struct nfsexstuff *exp,
|
|
|
|
struct ucred **credp)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
2009-05-14 21:39:08 +00:00
|
|
|
int i, error, *secflavors;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2009-05-11 18:45:04 +00:00
|
|
|
*credp = NULL;
|
2009-05-14 21:39:08 +00:00
|
|
|
exp->nes_numsecflavor = 0;
|
2012-10-22 17:50:54 +00:00
|
|
|
error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
|
2010-03-26 01:35:19 +00:00
|
|
|
if (error != 0)
|
|
|
|
/* Make sure the server replies ESTALE to the client. */
|
|
|
|
error = ESTALE;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nam && !error) {
|
|
|
|
error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
|
2009-05-14 21:39:08 +00:00
|
|
|
&exp->nes_numsecflavor, &secflavors);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (error) {
|
|
|
|
if (nfs_rootfhset) {
|
|
|
|
exp->nes_exflag = 0;
|
2009-05-14 21:39:08 +00:00
|
|
|
exp->nes_numsecflavor = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
error = 0;
|
|
|
|
} else {
|
|
|
|
vput(*vpp);
|
|
|
|
}
|
2009-05-14 21:39:08 +00:00
|
|
|
} else {
|
|
|
|
/* Copy the security flavors. */
|
|
|
|
for (i = 0; i < exp->nes_numsecflavor; i++)
|
|
|
|
exp->nes_secflavors[i] = secflavors[i];
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* nfsd_fhtovp() - convert a fh to a vnode ptr
|
|
|
|
* - look up fsid in mount list (if not found ret error)
|
|
|
|
* - get vp and export rights by calling nfsvno_fhtovp()
|
|
|
|
* - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
|
|
|
|
* for AUTH_SYS
|
2011-01-06 19:50:11 +00:00
|
|
|
* - if mpp != NULL, return the mount point so that it can
|
|
|
|
* be used for vn_finished_write() by the caller
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
|
|
|
void
|
2010-12-25 21:56:25 +00:00
|
|
|
nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
|
2009-05-04 15:23:58 +00:00
|
|
|
struct vnode **vpp, struct nfsexstuff *exp,
|
|
|
|
struct mount **mpp, int startwrite, struct thread *p)
|
|
|
|
{
|
|
|
|
struct mount *mp;
|
|
|
|
struct ucred *credanon;
|
|
|
|
fhandle_t *fhp;
|
|
|
|
|
|
|
|
fhp = (fhandle_t *)nfp->nfsrvfh_data;
|
|
|
|
/*
|
|
|
|
* Check for the special case of the nfsv4root_fh.
|
|
|
|
*/
|
2011-01-05 18:46:05 +00:00
|
|
|
mp = vfs_busyfs(&fhp->fh_fsid);
|
2011-01-06 19:50:11 +00:00
|
|
|
if (mpp != NULL)
|
|
|
|
*mpp = mp;
|
2011-01-05 18:46:05 +00:00
|
|
|
if (mp == NULL) {
|
2009-05-04 15:23:58 +00:00
|
|
|
*vpp = NULL;
|
|
|
|
nd->nd_repstat = ESTALE;
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
Revamp the old NFS server's File Handle Affinity (FHA) code so that
it will work with either the old or new server.
The FHA code keeps a cache of currently active file handles for
NFSv2 and v3 requests, so that read and write requests for the same
file are directed to the same group of threads (reads) or thread
(writes). It does not currently work for NFSv4 requests. They are
more complex, and will take more work to support.
This improves read-ahead performance, especially with ZFS, if the
FHA tuning parameters are configured appropriately. Without the
FHA code, concurrent reads that are part of a sequential read from
a file will be directed to separate NFS threads. This has the
effect of confusing the ZFS zfetch (prefetch) code and makes
sequential reads significantly slower with clients like Linux that
do a lot of prefetching.
The FHA code has also been updated to direct write requests to nearby
file offsets to the same thread in the same way it batches reads,
and the FHA code will now also send writes to multiple threads when
needed.
This improves sequential write performance in ZFS, because writes
to a file are now more ordered. Since NFS writes (generally
less than 64K) are smaller than the typical ZFS record size
(usually 128K), out of order NFS writes to the same block can
trigger a read in ZFS. Sending them down the same thread increases
the odds of their being in order.
In order for multiple write threads per file in the FHA code to be
useful, writes in the NFS server have been changed to use a LK_SHARED
vnode lock, and upgrade that to LK_EXCLUSIVE if the filesystem
doesn't allow multiple writers to a file at once. ZFS is currently
the only filesystem that allows multiple writers to a file, because
it has internal file range locking. This change does not affect the
NFSv4 code.
This improves random write performance to a single file in ZFS, since
we can now have multiple writers inside ZFS at one time.
I have changed the default tuning parameters to a 22 bit (4MB)
window size (from 256K) and unlimited commands per thread as a
result of my benchmarking with ZFS.
The FHA code has been updated to allow configuring the tuning
parameters from loader tunable variables in addition to sysctl
variables. The read offset window calculation has been slightly
modified as well. Instead of having separate bins, each file
handle has a rolling window of bin_shift size. This minimizes
glitches in throughput when shifting from one bin to another.
sys/conf/files:
Add nfs_fha_new.c and nfs_fha_old.c. Compile nfs_fha.c
when either the old or the new NFS server is built.
sys/fs/nfs/nfsport.h,
sys/fs/nfs/nfs_commonport.c:
Bring in changes from Rick Macklem to newnfs_realign that
allow it to operate in blocking (M_WAITOK) or non-blocking
(M_NOWAIT) mode.
sys/fs/nfs/nfs_commonsubs.c,
sys/fs/nfs/nfs_var.h:
Bring in a change from Rick Macklem to allow telling
nfsm_dissect() whether or not to wait for mallocs.
sys/fs/nfs/nfsm_subs.h:
Bring in changes from Rick Macklem to create a new
nfsm_dissect_nonblock() inline function and
NFSM_DISSECT_NONBLOCK() macro.
sys/fs/nfs/nfs_commonkrpc.c,
sys/fs/nfsclient/nfs_clkrpc.c:
Add the malloc wait flag to a newnfs_realign() call.
sys/fs/nfsserver/nfs_nfsdkrpc.c:
Setup the new NFS server's RPC thread pool so that it will
call the FHA code.
Add the malloc flag argument to newnfs_realign().
Unstaticize newnfs_nfsv3_procid[] so that we can use it in
the FHA code.
sys/fs/nfsserver/nfs_nfsdsocket.c:
In nfsrvd_dorpc(), add NFSPROC_WRITE to the list of RPC types
that use the LK_SHARED lock type.
sys/fs/nfsserver/nfs_nfsdport.c:
In nfsd_fhtovp(), if we're starting a write, check to see
whether the underlying filesystem supports shared writes.
If not, upgrade the lock type from LK_SHARED to LK_EXCLUSIVE.
sys/nfsserver/nfs_fha.c:
Remove all code that is specific to the NFS server
implementation. Anything that is server-specific is now
accessed through a callback supplied by that server's FHA
shim in the new softc.
There are now separate sysctls and tunables for the FHA
implementations for the old and new NFS servers. The new
NFS server has its tunables under vfs.nfsd.fha, the old
NFS server's tunables are under vfs.nfsrv.fha as before.
In fha_extract_info(), use callouts for all server-specific
code. Getting file handles and offsets is now done in the
individual server's shim module.
In fha_hash_entry_choose_thread(), change the way we decide
whether two reads are in proximity to each other.
Previously, the calculation was a simple shift operation to
see whether the offsets were in the same power of 2 bucket.
The issue was that there would be a bucket (and therefore
thread) transition, even if the reads were in close
proximity. When there is a thread transition, reads wind
up going somewhat out of order, and ZFS gets confused.
The new calculation simply tries to see whether the offsets
are within 1 << bin_shift of each other. If they are, the
reads will be sent to the same thread.
The effect of this change is that for sequential reads, if
the client doesn't exceed the max_reqs_per_nfsd parameter
and the bin_shift is set to a reasonable value (22, or
4MB works well in my tests), the reads in any sequential
stream will largely be confined to a single thread.
Change fha_assign() so that it takes a softc argument. It
is now called from the individual server's shim code, which
will pass in the softc.
Change fhe_stats_sysctl() so that it takes a softc
parameter. It is now called from the individual server's
shim code. Add the current offset to the list of things
printed out about each active thread.
Change the num_reads and num_writes counters in the
fha_hash_entry structure to 32-bit values, and rename them
num_rw and num_exclusive, respectively, to reflect their
changed usage.
Add an enable sysctl and tunable that allows the user to
disable the FHA code (when vfs.XXX.fha.enable = 0). This
is useful for before/after performance comparisons.
nfs_fha.h:
Move most structure definitions out of nfs_fha.c and into
the header file, so that the individual server shims can
see them.
Change the default bin_shift to 22 (4MB) instead of 18
(256K). Allow unlimited commands per thread.
sys/nfsserver/nfs_fha_old.c,
sys/nfsserver/nfs_fha_old.h,
sys/fs/nfsserver/nfs_fha_new.c,
sys/fs/nfsserver/nfs_fha_new.h:
Add shims for the old and new NFS servers to interface with
the FHA code, and callbacks for the
The shims contain all of the code and definitions that are
specific to the NFS servers.
They setup the server-specific callbacks and set the server
name for the sysctl and loader tunable variables.
sys/nfsserver/nfs_srvkrpc.c:
Configure the RPC code to call fhaold_assign() instead of
fha_assign().
sys/modules/nfsd/Makefile:
Add nfs_fha.c and nfs_fha_new.c.
sys/modules/nfsserver/Makefile:
Add nfs_fha_old.c.
Reviewed by: rmacklem
Sponsored by: Spectra Logic
MFC after: 2 weeks
2013-04-17 21:00:22 +00:00
|
|
|
if (startwrite) {
|
2009-05-04 15:23:58 +00:00
|
|
|
vn_start_write(NULL, mpp, V_WAIT);
|
Revamp the old NFS server's File Handle Affinity (FHA) code so that
it will work with either the old or new server.
The FHA code keeps a cache of currently active file handles for
NFSv2 and v3 requests, so that read and write requests for the same
file are directed to the same group of threads (reads) or thread
(writes). It does not currently work for NFSv4 requests. They are
more complex, and will take more work to support.
This improves read-ahead performance, especially with ZFS, if the
FHA tuning parameters are configured appropriately. Without the
FHA code, concurrent reads that are part of a sequential read from
a file will be directed to separate NFS threads. This has the
effect of confusing the ZFS zfetch (prefetch) code and makes
sequential reads significantly slower with clients like Linux that
do a lot of prefetching.
The FHA code has also been updated to direct write requests to nearby
file offsets to the same thread in the same way it batches reads,
and the FHA code will now also send writes to multiple threads when
needed.
This improves sequential write performance in ZFS, because writes
to a file are now more ordered. Since NFS writes (generally
less than 64K) are smaller than the typical ZFS record size
(usually 128K), out of order NFS writes to the same block can
trigger a read in ZFS. Sending them down the same thread increases
the odds of their being in order.
In order for multiple write threads per file in the FHA code to be
useful, writes in the NFS server have been changed to use a LK_SHARED
vnode lock, and upgrade that to LK_EXCLUSIVE if the filesystem
doesn't allow multiple writers to a file at once. ZFS is currently
the only filesystem that allows multiple writers to a file, because
it has internal file range locking. This change does not affect the
NFSv4 code.
This improves random write performance to a single file in ZFS, since
we can now have multiple writers inside ZFS at one time.
I have changed the default tuning parameters to a 22 bit (4MB)
window size (from 256K) and unlimited commands per thread as a
result of my benchmarking with ZFS.
The FHA code has been updated to allow configuring the tuning
parameters from loader tunable variables in addition to sysctl
variables. The read offset window calculation has been slightly
modified as well. Instead of having separate bins, each file
handle has a rolling window of bin_shift size. This minimizes
glitches in throughput when shifting from one bin to another.
sys/conf/files:
Add nfs_fha_new.c and nfs_fha_old.c. Compile nfs_fha.c
when either the old or the new NFS server is built.
sys/fs/nfs/nfsport.h,
sys/fs/nfs/nfs_commonport.c:
Bring in changes from Rick Macklem to newnfs_realign that
allow it to operate in blocking (M_WAITOK) or non-blocking
(M_NOWAIT) mode.
sys/fs/nfs/nfs_commonsubs.c,
sys/fs/nfs/nfs_var.h:
Bring in a change from Rick Macklem to allow telling
nfsm_dissect() whether or not to wait for mallocs.
sys/fs/nfs/nfsm_subs.h:
Bring in changes from Rick Macklem to create a new
nfsm_dissect_nonblock() inline function and
NFSM_DISSECT_NONBLOCK() macro.
sys/fs/nfs/nfs_commonkrpc.c,
sys/fs/nfsclient/nfs_clkrpc.c:
Add the malloc wait flag to a newnfs_realign() call.
sys/fs/nfsserver/nfs_nfsdkrpc.c:
Setup the new NFS server's RPC thread pool so that it will
call the FHA code.
Add the malloc flag argument to newnfs_realign().
Unstaticize newnfs_nfsv3_procid[] so that we can use it in
the FHA code.
sys/fs/nfsserver/nfs_nfsdsocket.c:
In nfsrvd_dorpc(), add NFSPROC_WRITE to the list of RPC types
that use the LK_SHARED lock type.
sys/fs/nfsserver/nfs_nfsdport.c:
In nfsd_fhtovp(), if we're starting a write, check to see
whether the underlying filesystem supports shared writes.
If not, upgrade the lock type from LK_SHARED to LK_EXCLUSIVE.
sys/nfsserver/nfs_fha.c:
Remove all code that is specific to the NFS server
implementation. Anything that is server-specific is now
accessed through a callback supplied by that server's FHA
shim in the new softc.
There are now separate sysctls and tunables for the FHA
implementations for the old and new NFS servers. The new
NFS server has its tunables under vfs.nfsd.fha, the old
NFS server's tunables are under vfs.nfsrv.fha as before.
In fha_extract_info(), use callouts for all server-specific
code. Getting file handles and offsets is now done in the
individual server's shim module.
In fha_hash_entry_choose_thread(), change the way we decide
whether two reads are in proximity to each other.
Previously, the calculation was a simple shift operation to
see whether the offsets were in the same power of 2 bucket.
The issue was that there would be a bucket (and therefore
thread) transition, even if the reads were in close
proximity. When there is a thread transition, reads wind
up going somewhat out of order, and ZFS gets confused.
The new calculation simply tries to see whether the offsets
are within 1 << bin_shift of each other. If they are, the
reads will be sent to the same thread.
The effect of this change is that for sequential reads, if
the client doesn't exceed the max_reqs_per_nfsd parameter
and the bin_shift is set to a reasonable value (22, or
4MB works well in my tests), the reads in any sequential
stream will largely be confined to a single thread.
Change fha_assign() so that it takes a softc argument. It
is now called from the individual server's shim code, which
will pass in the softc.
Change fhe_stats_sysctl() so that it takes a softc
parameter. It is now called from the individual server's
shim code. Add the current offset to the list of things
printed out about each active thread.
Change the num_reads and num_writes counters in the
fha_hash_entry structure to 32-bit values, and rename them
num_rw and num_exclusive, respectively, to reflect their
changed usage.
Add an enable sysctl and tunable that allows the user to
disable the FHA code (when vfs.XXX.fha.enable = 0). This
is useful for before/after performance comparisons.
nfs_fha.h:
Move most structure definitions out of nfs_fha.c and into
the header file, so that the individual server shims can
see them.
Change the default bin_shift to 22 (4MB) instead of 18
(256K). Allow unlimited commands per thread.
sys/nfsserver/nfs_fha_old.c,
sys/nfsserver/nfs_fha_old.h,
sys/fs/nfsserver/nfs_fha_new.c,
sys/fs/nfsserver/nfs_fha_new.h:
Add shims for the old and new NFS servers to interface with
the FHA code, and callbacks for the
The shims contain all of the code and definitions that are
specific to the NFS servers.
They setup the server-specific callbacks and set the server
name for the sysctl and loader tunable variables.
sys/nfsserver/nfs_srvkrpc.c:
Configure the RPC code to call fhaold_assign() instead of
fha_assign().
sys/modules/nfsd/Makefile:
Add nfs_fha.c and nfs_fha_new.c.
sys/modules/nfsserver/Makefile:
Add nfs_fha_old.c.
Reviewed by: rmacklem
Sponsored by: Spectra Logic
MFC after: 2 weeks
2013-04-17 21:00:22 +00:00
|
|
|
if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
|
|
|
|
lktype = LK_EXCLUSIVE;
|
|
|
|
}
|
2010-12-25 21:56:25 +00:00
|
|
|
nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
|
2009-05-04 15:23:58 +00:00
|
|
|
&credanon);
|
2011-01-05 18:46:05 +00:00
|
|
|
vfs_unbusy(mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For NFSv4 without a pseudo root fs, unexported file handles
|
|
|
|
* can be returned, so that Lookup works everywhere.
|
|
|
|
*/
|
|
|
|
if (!nd->nd_repstat && exp->nes_exflag == 0 &&
|
|
|
|
!(nd->nd_flag & ND_NFSV4)) {
|
|
|
|
vput(*vpp);
|
|
|
|
nd->nd_repstat = EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Personally, I've never seen any point in requiring a
|
|
|
|
* reserved port#, since only in the rare case where the
|
|
|
|
* clients are all boxes with secure system priviledges,
|
|
|
|
* does it provide any enhanced security, but... some people
|
|
|
|
* believe it to be useful and keep putting this code back in.
|
|
|
|
* (There is also some "security checker" out there that
|
|
|
|
* complains if the nfs server doesn't enforce this.)
|
|
|
|
* However, note the following:
|
|
|
|
* RFC3530 (NFSv4) specifies that a reserved port# not be
|
|
|
|
* required.
|
|
|
|
* RFC2623 recommends that, if a reserved port# is checked for,
|
|
|
|
* that there be a way to turn that off--> ifdef'd.
|
|
|
|
*/
|
|
|
|
#ifdef NFS_REQRSVPORT
|
|
|
|
if (!nd->nd_repstat) {
|
2009-05-14 21:39:08 +00:00
|
|
|
struct sockaddr_in *saddr;
|
|
|
|
struct sockaddr_in6 *saddr6;
|
|
|
|
|
|
|
|
saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
|
|
|
|
saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
|
|
|
|
if (!(nd->nd_flag & ND_NFSV4) &&
|
|
|
|
((saddr->sin_family == AF_INET &&
|
|
|
|
ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
|
|
|
|
(saddr6->sin6_family == AF_INET6 &&
|
|
|
|
ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
|
|
|
|
vput(*vpp);
|
|
|
|
nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
#endif /* NFS_REQRSVPORT */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check/setup credentials.
|
|
|
|
*/
|
|
|
|
if (!nd->nd_repstat) {
|
|
|
|
nd->nd_saveduid = nd->nd_cred->cr_uid;
|
|
|
|
nd->nd_repstat = nfsd_excred(nd, exp, credanon);
|
|
|
|
if (nd->nd_repstat)
|
|
|
|
vput(*vpp);
|
|
|
|
}
|
2009-05-09 18:09:17 +00:00
|
|
|
if (credanon != NULL)
|
|
|
|
crfree(credanon);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_repstat) {
|
|
|
|
if (startwrite)
|
|
|
|
vn_finished_write(mp);
|
|
|
|
*vpp = NULL;
|
2011-01-06 19:50:11 +00:00
|
|
|
if (mpp != NULL)
|
|
|
|
*mpp = NULL;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE2(0, nd);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* glue for fp.
|
|
|
|
*/
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
static int
|
2009-05-04 15:23:58 +00:00
|
|
|
fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
|
|
|
|
{
|
|
|
|
struct filedesc *fdp;
|
|
|
|
struct file *fp;
|
2011-07-16 08:51:09 +00:00
|
|
|
int error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
fdp = p->td_proc->p_fd;
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
if (fd < 0 || fd >= fdp->fd_nfiles ||
|
|
|
|
(fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
|
2011-07-16 08:51:09 +00:00
|
|
|
error = EBADF;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
*fpp = fp;
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-05-14 21:39:08 +00:00
|
|
|
* Called from nfssvc() to update the exports list. Just call
|
2009-05-04 15:23:58 +00:00
|
|
|
* vfs_export(). This has to be done, since the v4 root fake fs isn't
|
|
|
|
* in the mount list.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
|
|
|
|
{
|
|
|
|
struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
|
2011-07-16 08:51:09 +00:00
|
|
|
int error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct nameidata nd;
|
|
|
|
fhandle_t fh;
|
|
|
|
|
|
|
|
error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
|
2011-01-02 21:34:01 +00:00
|
|
|
if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
|
2009-05-04 15:23:58 +00:00
|
|
|
nfs_rootfhset = 0;
|
2011-01-02 21:34:01 +00:00
|
|
|
else if (error == 0) {
|
2011-07-16 08:51:09 +00:00
|
|
|
if (nfsexargp->fspec == NULL) {
|
|
|
|
error = EPERM;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* If fspec != NULL, this is the v4root path.
|
|
|
|
*/
|
2012-10-22 17:50:54 +00:00
|
|
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
|
2009-05-04 15:23:58 +00:00
|
|
|
nfsexargp->fspec, p);
|
|
|
|
if ((error = namei(&nd)) != 0)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
error = nfsvno_getfh(nd.ni_vp, &fh, p);
|
|
|
|
vrele(nd.ni_vp);
|
|
|
|
if (!error) {
|
|
|
|
nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
|
|
|
|
NFSBCOPY((caddr_t)&fh,
|
|
|
|
nfs_rootfh.nfsrvfh_data,
|
|
|
|
sizeof (fhandle_t));
|
|
|
|
nfs_rootfhset = 1;
|
|
|
|
}
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function needs to test to see if the system is near its limit
|
|
|
|
* for memory allocation via malloc() or mget() and return True iff
|
|
|
|
* either of these resources are near their limit.
|
|
|
|
* XXX (For now, this is just a stub.)
|
|
|
|
*/
|
|
|
|
int nfsrv_testmalloclimit = 0;
|
|
|
|
int
|
|
|
|
nfsrv_mallocmget_limit(void)
|
|
|
|
{
|
|
|
|
static int printmesg = 0;
|
|
|
|
static int testval = 1;
|
|
|
|
|
|
|
|
if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
|
|
|
|
if ((printmesg++ % 100) == 0)
|
|
|
|
printf("nfsd: malloc/mget near limit\n");
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BSD specific initialization of a mount point.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nfsd_mntinit(void)
|
|
|
|
{
|
|
|
|
static int inited = 0;
|
|
|
|
|
|
|
|
if (inited)
|
|
|
|
return;
|
|
|
|
inited = 1;
|
|
|
|
nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
|
|
|
|
TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
|
2012-04-20 06:50:44 +00:00
|
|
|
TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
|
2009-05-04 15:23:58 +00:00
|
|
|
nfsv4root_mnt.mnt_export = NULL;
|
|
|
|
TAILQ_INIT(&nfsv4root_opt);
|
|
|
|
TAILQ_INIT(&nfsv4root_newopt);
|
|
|
|
nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
|
|
|
|
nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
|
|
|
|
nfsv4root_mnt.mnt_nvnodelistsize = 0;
|
2012-04-20 06:50:44 +00:00
|
|
|
nfsv4root_mnt.mnt_activevnodelistsize = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a vnode for a file handle, without checking exports, etc.
|
|
|
|
*/
|
|
|
|
struct vnode *
|
|
|
|
nfsvno_getvp(fhandle_t *fhp)
|
|
|
|
{
|
|
|
|
struct mount *mp;
|
|
|
|
struct vnode *vp;
|
|
|
|
int error;
|
|
|
|
|
2011-01-05 18:46:05 +00:00
|
|
|
mp = vfs_busyfs(&fhp->fh_fsid);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (mp == NULL)
|
|
|
|
return (NULL);
|
2011-05-22 01:07:54 +00:00
|
|
|
error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
|
2011-01-05 18:46:05 +00:00
|
|
|
vfs_unbusy(mp);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (error)
|
|
|
|
return (NULL);
|
|
|
|
return (vp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a local VOP_ADVLOCK().
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
|
2009-05-17 19:33:48 +00:00
|
|
|
u_int64_t end, struct thread *td)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
2011-07-16 08:51:09 +00:00
|
|
|
int error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct flock fl;
|
|
|
|
u_int64_t tlen;
|
|
|
|
|
2010-09-19 01:05:19 +00:00
|
|
|
if (nfsrv_dolocallocks == 0)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2011-01-02 19:58:39 +00:00
|
|
|
|
|
|
|
/* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */
|
2011-07-16 08:51:09 +00:00
|
|
|
if ((vp->v_iflag & VI_DOOMED) != 0) {
|
|
|
|
error = EPERM;
|
|
|
|
goto out;
|
|
|
|
}
|
2011-01-02 19:58:39 +00:00
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
fl.l_whence = SEEK_SET;
|
|
|
|
fl.l_type = ftype;
|
|
|
|
fl.l_start = (off_t)first;
|
|
|
|
if (end == NFS64BITSSET) {
|
|
|
|
fl.l_len = 0;
|
|
|
|
} else {
|
|
|
|
tlen = end - first;
|
|
|
|
fl.l_len = (off_t)tlen;
|
|
|
|
}
|
|
|
|
/*
|
2009-05-17 19:33:48 +00:00
|
|
|
* For FreeBSD8, the l_pid and l_sysid must be set to the same
|
|
|
|
* values for all calls, so that all locks will be held by the
|
|
|
|
* nfsd server. (The nfsd server handles conflicts between the
|
|
|
|
* various clients.)
|
|
|
|
* Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
|
|
|
|
* bytes, so it can't be put in l_sysid.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
2009-05-21 01:50:27 +00:00
|
|
|
if (nfsv4_sysid == 0)
|
|
|
|
nfsv4_sysid = nlm_acquire_next_sysid();
|
2009-05-17 19:33:48 +00:00
|
|
|
fl.l_pid = (pid_t)0;
|
2009-05-21 01:50:27 +00:00
|
|
|
fl.l_sysid = (int)nfsv4_sysid;
|
2009-05-17 19:33:48 +00:00
|
|
|
|
2011-07-16 08:05:26 +00:00
|
|
|
NFSVOPUNLOCK(vp, 0);
|
2010-09-19 01:05:19 +00:00
|
|
|
if (ftype == F_UNLCK)
|
|
|
|
error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
|
|
|
|
(F_POSIX | F_REMOTE));
|
|
|
|
else
|
|
|
|
error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
|
|
|
|
(F_POSIX | F_REMOTE));
|
2011-07-16 08:05:26 +00:00
|
|
|
NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the nfsv4 root exports.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_v4rootexport(struct nfsrv_descript *nd)
|
|
|
|
{
|
|
|
|
struct ucred *credanon;
|
2011-07-16 08:51:09 +00:00
|
|
|
int exflags, error = 0, numsecflavor, *secflavors, i;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
|
2009-05-14 21:39:08 +00:00
|
|
|
&credanon, &numsecflavor, &secflavors);
|
2011-07-16 08:51:09 +00:00
|
|
|
if (error) {
|
|
|
|
error = NFSERR_PROGUNAVAIL;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-05-09 18:09:17 +00:00
|
|
|
if (credanon != NULL)
|
|
|
|
crfree(credanon);
|
2009-05-14 21:39:08 +00:00
|
|
|
for (i = 0; i < numsecflavor; i++) {
|
|
|
|
if (secflavors[i] == AUTH_SYS)
|
|
|
|
nd->nd_flag |= ND_EXAUTHSYS;
|
|
|
|
else if (secflavors[i] == RPCSEC_GSS_KRB5)
|
|
|
|
nd->nd_flag |= ND_EXGSS;
|
|
|
|
else if (secflavors[i] == RPCSEC_GSS_KRB5I)
|
|
|
|
nd->nd_flag |= ND_EXGSSINTEGRITY;
|
|
|
|
else if (secflavors[i] == RPCSEC_GSS_KRB5P)
|
|
|
|
nd->nd_flag |= ND_EXGSSPRIVACY;
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Nfs server psuedo system call for the nfsd's
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* MPSAFE
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
|
|
|
|
{
|
|
|
|
struct file *fp;
|
2009-05-12 16:04:51 +00:00
|
|
|
struct nfsd_addsock_args sockarg;
|
|
|
|
struct nfsd_nfsd_args nfsdarg;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2009-05-04 15:23:58 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
if (uap->flag & NFSSVC_NFSDADDSOCK) {
|
2009-05-12 16:04:51 +00:00
|
|
|
error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
|
2009-05-04 15:23:58 +00:00
|
|
|
if (error)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2011-08-11 12:30:23 +00:00
|
|
|
/*
|
|
|
|
* Since we don't know what rights might be required,
|
|
|
|
* pretend that we need them all. It is better to be too
|
|
|
|
* careful than too reckless.
|
|
|
|
*/
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
error = fget(td, sockarg.sock,
|
|
|
|
cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
|
|
|
|
if (error != 0)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (fp->f_type != DTYPE_SOCKET) {
|
|
|
|
fdrop(fp, td);
|
2011-07-16 08:51:09 +00:00
|
|
|
error = EPERM;
|
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
error = nfsrvd_addsock(fp);
|
|
|
|
fdrop(fp, td);
|
|
|
|
} else if (uap->flag & NFSSVC_NFSDNFSD) {
|
2011-07-16 08:51:09 +00:00
|
|
|
if (uap->argp == NULL) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2009-05-12 16:04:51 +00:00
|
|
|
error = copyin(uap->argp, (caddr_t)&nfsdarg,
|
|
|
|
sizeof (nfsdarg));
|
|
|
|
if (error)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-12 16:04:51 +00:00
|
|
|
error = nfsrvd_nfsd(td, &nfsdarg);
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
|
|
|
error = nfssvc_srvcall(td, uap, td->td_ucred);
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
|
|
|
|
{
|
|
|
|
struct nfsex_args export;
|
|
|
|
struct file *fp = NULL;
|
|
|
|
int stablefd, len;
|
|
|
|
struct nfsd_clid adminrevoke;
|
|
|
|
struct nfsd_dumplist dumplist;
|
|
|
|
struct nfsd_dumpclients *dumpclients;
|
|
|
|
struct nfsd_dumplocklist dumplocklist;
|
|
|
|
struct nfsd_dumplocks *dumplocks;
|
|
|
|
struct nameidata nd;
|
|
|
|
vnode_t vp;
|
2012-10-14 22:33:17 +00:00
|
|
|
int error = EINVAL, igotlock;
|
2011-01-14 23:30:35 +00:00
|
|
|
struct proc *procp;
|
2012-10-14 22:33:17 +00:00
|
|
|
static int suspend_nfsd = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (uap->flag & NFSSVC_PUBLICFH) {
|
|
|
|
NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
|
|
|
|
sizeof (fhandle_t));
|
|
|
|
error = copyin(uap->argp,
|
|
|
|
&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
|
|
|
|
if (!error)
|
|
|
|
nfs_pubfhset = 1;
|
|
|
|
} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
|
|
|
|
error = copyin(uap->argp,(caddr_t)&export,
|
|
|
|
sizeof (struct nfsex_args));
|
|
|
|
if (!error)
|
|
|
|
error = nfsrv_v4rootexport(&export, cred, p);
|
|
|
|
} else if (uap->flag & NFSSVC_NOPUBLICFH) {
|
|
|
|
nfs_pubfhset = 0;
|
|
|
|
error = 0;
|
|
|
|
} else if (uap->flag & NFSSVC_STABLERESTART) {
|
|
|
|
error = copyin(uap->argp, (caddr_t)&stablefd,
|
|
|
|
sizeof (int));
|
|
|
|
if (!error)
|
|
|
|
error = fp_getfvp(p, stablefd, &fp, &vp);
|
|
|
|
if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
|
|
|
|
error = EBADF;
|
|
|
|
if (!error && newnfs_numnfsd != 0)
|
|
|
|
error = EPERM;
|
|
|
|
if (!error) {
|
|
|
|
nfsrv_stablefirst.nsf_fp = fp;
|
|
|
|
nfsrv_setupstable(p);
|
|
|
|
}
|
|
|
|
} else if (uap->flag & NFSSVC_ADMINREVOKE) {
|
|
|
|
error = copyin(uap->argp, (caddr_t)&adminrevoke,
|
|
|
|
sizeof (struct nfsd_clid));
|
|
|
|
if (!error)
|
|
|
|
error = nfsrv_adminrevoke(&adminrevoke, p);
|
|
|
|
} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
|
|
|
|
error = copyin(uap->argp, (caddr_t)&dumplist,
|
|
|
|
sizeof (struct nfsd_dumplist));
|
|
|
|
if (!error && (dumplist.ndl_size < 1 ||
|
|
|
|
dumplist.ndl_size > NFSRV_MAXDUMPLIST))
|
|
|
|
error = EPERM;
|
|
|
|
if (!error) {
|
|
|
|
len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
|
|
|
|
dumpclients = (struct nfsd_dumpclients *)malloc(len,
|
|
|
|
M_TEMP, M_WAITOK);
|
|
|
|
nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
|
|
|
|
error = copyout(dumpclients,
|
|
|
|
CAST_USER_ADDR_T(dumplist.ndl_list), len);
|
|
|
|
free((caddr_t)dumpclients, M_TEMP);
|
|
|
|
}
|
|
|
|
} else if (uap->flag & NFSSVC_DUMPLOCKS) {
|
|
|
|
error = copyin(uap->argp, (caddr_t)&dumplocklist,
|
|
|
|
sizeof (struct nfsd_dumplocklist));
|
|
|
|
if (!error && (dumplocklist.ndllck_size < 1 ||
|
|
|
|
dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
|
|
|
|
error = EPERM;
|
|
|
|
if (!error)
|
|
|
|
error = nfsrv_lookupfilename(&nd,
|
|
|
|
dumplocklist.ndllck_fname, p);
|
|
|
|
if (!error) {
|
|
|
|
len = sizeof (struct nfsd_dumplocks) *
|
|
|
|
dumplocklist.ndllck_size;
|
|
|
|
dumplocks = (struct nfsd_dumplocks *)malloc(len,
|
|
|
|
M_TEMP, M_WAITOK);
|
|
|
|
nfsrv_dumplocks(nd.ni_vp, dumplocks,
|
|
|
|
dumplocklist.ndllck_size, p);
|
|
|
|
vput(nd.ni_vp);
|
|
|
|
error = copyout(dumplocks,
|
|
|
|
CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
|
|
|
|
free((caddr_t)dumplocks, M_TEMP);
|
|
|
|
}
|
2011-01-14 23:30:35 +00:00
|
|
|
} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
|
|
|
|
procp = p->td_proc;
|
|
|
|
PROC_LOCK(procp);
|
|
|
|
nfsd_master_pid = procp->p_pid;
|
|
|
|
bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
|
|
|
|
nfsd_master_start = procp->p_stats->p_start;
|
|
|
|
nfsd_master_proc = procp;
|
|
|
|
PROC_UNLOCK(procp);
|
2012-10-14 22:33:17 +00:00
|
|
|
} else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
|
|
|
|
NFSLOCKV4ROOTMUTEX();
|
|
|
|
if (suspend_nfsd == 0) {
|
|
|
|
/* Lock out all nfsd threads */
|
|
|
|
do {
|
|
|
|
igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
|
|
|
|
NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
|
|
|
|
} while (igotlock == 0 && suspend_nfsd == 0);
|
|
|
|
suspend_nfsd = 1;
|
|
|
|
}
|
|
|
|
NFSUNLOCKV4ROOTMUTEX();
|
|
|
|
error = 0;
|
|
|
|
} else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
|
|
|
|
NFSLOCKV4ROOTMUTEX();
|
|
|
|
if (suspend_nfsd != 0) {
|
|
|
|
nfsv4_unlock(&nfsd_suspend_lock, 0);
|
|
|
|
suspend_nfsd = 0;
|
|
|
|
}
|
|
|
|
NFSUNLOCKV4ROOTMUTEX();
|
|
|
|
error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
NFSEXITCODE(error);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2009-05-14 21:39:08 +00:00
|
|
|
/*
|
|
|
|
* Check exports.
|
|
|
|
* Returns 0 if ok, 1 otherwise.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This seems odd, but allow the case where the security flavor
|
|
|
|
* list is empty. This happens when NFSv4 is traversing non-exported
|
|
|
|
* file systems. Exported file systems should always have a non-empty
|
|
|
|
* security flavor list.
|
|
|
|
*/
|
|
|
|
if (exp->nes_numsecflavor == 0)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
for (i = 0; i < exp->nes_numsecflavor; i++) {
|
|
|
|
/*
|
|
|
|
* The tests for privacy and integrity must be first,
|
|
|
|
* since ND_GSS is set for everything but AUTH_SYS.
|
|
|
|
*/
|
|
|
|
if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
|
|
|
|
(nd->nd_flag & ND_GSSPRIVACY))
|
|
|
|
return (0);
|
|
|
|
if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
|
|
|
|
(nd->nd_flag & ND_GSSINTEGRITY))
|
|
|
|
return (0);
|
|
|
|
if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
|
|
|
|
(nd->nd_flag & ND_GSS))
|
|
|
|
return (0);
|
|
|
|
if (exp->nes_secflavors[i] == AUTH_SYS &&
|
|
|
|
(nd->nd_flag & ND_GSS) == 0)
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2010-10-22 21:38:56 +00:00
|
|
|
/*
|
|
|
|
* Calculate a hash value for the fid in a file handle.
|
|
|
|
*/
|
2010-10-23 22:28:29 +00:00
|
|
|
uint32_t
|
2010-10-22 21:38:56 +00:00
|
|
|
nfsrv_hashfh(fhandle_t *fhp)
|
|
|
|
{
|
2010-10-23 22:28:29 +00:00
|
|
|
uint32_t hashval;
|
2010-10-22 21:38:56 +00:00
|
|
|
|
2010-10-23 22:28:29 +00:00
|
|
|
hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
|
2010-10-22 21:38:56 +00:00
|
|
|
return (hashval);
|
|
|
|
}
|
|
|
|
|
2011-01-14 23:30:35 +00:00
|
|
|
/*
|
|
|
|
* Signal the userland master nfsd to backup the stable restart file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
nfsrv_backupstable(void)
|
|
|
|
{
|
|
|
|
struct proc *procp;
|
|
|
|
|
|
|
|
if (nfsd_master_proc != NULL) {
|
|
|
|
procp = pfind(nfsd_master_pid);
|
|
|
|
/* Try to make sure it is the correct process. */
|
|
|
|
if (procp == nfsd_master_proc &&
|
|
|
|
procp->p_stats->p_start.tv_sec ==
|
|
|
|
nfsd_master_start.tv_sec &&
|
|
|
|
procp->p_stats->p_start.tv_usec ==
|
|
|
|
nfsd_master_start.tv_usec &&
|
|
|
|
strcmp(procp->p_comm, nfsd_master_comm) == 0)
|
2011-09-16 13:58:51 +00:00
|
|
|
kern_psignal(procp, SIGUSR2);
|
2011-01-14 23:30:35 +00:00
|
|
|
else
|
|
|
|
nfsd_master_proc = NULL;
|
|
|
|
|
|
|
|
if (procp != NULL)
|
|
|
|
PROC_UNLOCK(procp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called once to initialize data structures...
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nfsd_modevent(module_t mod, int type, void *data)
|
|
|
|
{
|
2013-08-14 21:11:26 +00:00
|
|
|
int error = 0, i;
|
2009-05-04 15:23:58 +00:00
|
|
|
static int loaded = 0;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case MOD_LOAD:
|
|
|
|
if (loaded)
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
newnfs_portinit();
|
2013-08-14 21:11:26 +00:00
|
|
|
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
|
|
|
|
snprintf(nfsrchash_table[i].lock_name,
|
|
|
|
sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d",
|
|
|
|
i);
|
|
|
|
mtx_init(&nfsrchash_table[i].mtx,
|
|
|
|
nfsrchash_table[i].lock_name, NULL, MTX_DEF);
|
2014-01-03 15:09:59 +00:00
|
|
|
snprintf(nfsrcahash_table[i].lock_name,
|
|
|
|
sizeof(nfsrcahash_table[i].lock_name), "nfsrc_tcpa%d",
|
|
|
|
i);
|
|
|
|
mtx_init(&nfsrcahash_table[i].mtx,
|
|
|
|
nfsrcahash_table[i].lock_name, NULL, MTX_DEF);
|
2013-08-14 21:11:26 +00:00
|
|
|
}
|
|
|
|
mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF);
|
2009-05-04 15:23:58 +00:00
|
|
|
mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
|
|
|
|
mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
|
|
|
|
MTX_DEF);
|
|
|
|
lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
|
|
|
|
nfsrvd_initcache();
|
|
|
|
nfsd_init();
|
|
|
|
NFSD_LOCK();
|
|
|
|
nfsrvd_init(0);
|
|
|
|
NFSD_UNLOCK();
|
|
|
|
nfsd_mntinit();
|
|
|
|
#ifdef VV_DISABLEDELEG
|
|
|
|
vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
|
|
|
|
vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
|
|
|
|
#endif
|
|
|
|
nfsd_call_servertimer = nfsrv_servertimer;
|
|
|
|
nfsd_call_nfsd = nfssvc_nfsd;
|
|
|
|
loaded = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case MOD_UNLOAD:
|
|
|
|
if (newnfs_numnfsd != 0) {
|
|
|
|
error = EBUSY;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef VV_DISABLEDELEG
|
|
|
|
vn_deleg_ops.vndeleg_recall = NULL;
|
|
|
|
vn_deleg_ops.vndeleg_disable = NULL;
|
|
|
|
#endif
|
|
|
|
nfsd_call_servertimer = NULL;
|
|
|
|
nfsd_call_nfsd = NULL;
|
2011-01-12 23:34:09 +00:00
|
|
|
|
2011-04-10 20:43:07 +00:00
|
|
|
/* Clean out all NFSv4 state. */
|
|
|
|
nfsrv_throwawayallstate(curthread);
|
|
|
|
|
2011-01-12 23:34:09 +00:00
|
|
|
/* Clean the NFS server reply cache */
|
|
|
|
nfsrvd_cleancache();
|
|
|
|
|
2011-04-10 20:43:07 +00:00
|
|
|
/* Free up the krpc server pool. */
|
|
|
|
if (nfsrvd_pool != NULL)
|
|
|
|
svcpool_destroy(nfsrvd_pool);
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/* and get rid of the locks */
|
2014-01-03 15:09:59 +00:00
|
|
|
for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
|
2013-08-14 21:11:26 +00:00
|
|
|
mtx_destroy(&nfsrchash_table[i].mtx);
|
2014-01-03 15:09:59 +00:00
|
|
|
mtx_destroy(&nfsrcahash_table[i].mtx);
|
|
|
|
}
|
2013-08-14 21:11:26 +00:00
|
|
|
mtx_destroy(&nfsrc_udpmtx);
|
2009-05-04 15:23:58 +00:00
|
|
|
mtx_destroy(&nfs_v4root_mutex);
|
|
|
|
mtx_destroy(&nfsv4root_mnt.mnt_mtx);
|
|
|
|
lockdestroy(&nfsv4root_mnt.mnt_explock);
|
|
|
|
loaded = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = EOPNOTSUPP;
|
|
|
|
break;
|
|
|
|
}
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
static moduledata_t nfsd_mod = {
|
|
|
|
"nfsd",
|
|
|
|
nfsd_modevent,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
|
|
|
|
|
|
|
|
/* So that loader and kldload(2) can find us, wherever we are.. */
|
|
|
|
MODULE_VERSION(nfsd, 1);
|
|
|
|
MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
|
2011-01-03 20:37:31 +00:00
|
|
|
MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
|
2009-05-22 01:15:07 +00:00
|
|
|
MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
|
2010-06-15 00:25:04 +00:00
|
|
|
MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
|
|
|
|
MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
|
2009-05-04 15:23:58 +00:00
|
|
|
|