2009-05-04 15:23:58 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1989, 1991, 1993, 1995
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Rick Macklem at The University of Guelph.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2017-02-28 23:42:47 +00:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
2009-05-04 15:23:58 +00:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Socket operations for use by nfs
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "opt_kgssapi.h"
|
|
|
|
#include "opt_nfs.h"
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/limits.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/signalvar.h>
|
|
|
|
#include <sys/syscallsubr.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
#include <sys/vnode.h>
|
|
|
|
|
|
|
|
#include <rpc/rpc.h>
|
2014-07-01 20:47:16 +00:00
|
|
|
#include <rpc/krpc.h>
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
#include <kgssapi/krb5/kcrypto.h>
|
|
|
|
|
|
|
|
#include <fs/nfs/nfsport.h>
|
|
|
|
|
2011-06-18 23:02:53 +00:00
|
|
|
#ifdef KDTRACE_HOOKS
|
|
|
|
#include <sys/dtrace_bsd.h>
|
|
|
|
|
|
|
|
dtrace_nfsclient_nfs23_start_probe_func_t
|
|
|
|
dtrace_nfscl_nfs234_start_probe;
|
|
|
|
|
|
|
|
dtrace_nfsclient_nfs23_done_probe_func_t
|
|
|
|
dtrace_nfscl_nfs234_done_probe;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Registered probes by RPC type.
|
|
|
|
*/
|
2012-12-08 22:52:39 +00:00
|
|
|
uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
|
|
|
|
uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
|
2011-06-18 23:02:53 +00:00
|
|
|
|
2012-12-08 22:52:39 +00:00
|
|
|
uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
|
|
|
|
uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
|
2011-06-18 23:02:53 +00:00
|
|
|
|
2012-12-08 22:52:39 +00:00
|
|
|
uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
|
|
|
|
uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
|
2011-06-18 23:02:53 +00:00
|
|
|
#endif
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
NFSSTATESPINLOCK;
|
|
|
|
NFSREQSPINLOCK;
|
2012-12-08 22:52:39 +00:00
|
|
|
NFSDLOCKMUTEX;
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
NFSCLSTATEMUTEX;
|
2016-08-12 22:44:59 +00:00
|
|
|
extern struct nfsstatsv1 nfsstatsv1;
|
2009-05-04 15:23:58 +00:00
|
|
|
extern struct nfsreqhead nfsd_reqq;
|
|
|
|
extern int nfscl_ticks;
|
|
|
|
extern void (*ncl_call_invalcaches)(struct vnode *);
|
2012-12-08 22:52:39 +00:00
|
|
|
extern int nfs_numnfscbd;
|
|
|
|
extern int nfscl_debuglevel;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2012-12-08 22:52:39 +00:00
|
|
|
SVCPOOL *nfscbd_pool;
|
2009-05-04 15:23:58 +00:00
|
|
|
static int nfsrv_gsscallbackson = 0;
|
|
|
|
static int nfs_bufpackets = 4;
|
|
|
|
static int nfs_reconnects;
|
|
|
|
static int nfs3_jukebox_delay = 10;
|
|
|
|
static int nfs_skip_wcc_data_onerr = 1;
|
|
|
|
|
2011-05-15 20:52:43 +00:00
|
|
|
SYSCTL_DECL(_vfs_nfs);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
2011-05-15 20:52:43 +00:00
|
|
|
SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
|
2009-05-04 15:23:58 +00:00
|
|
|
"Buffer reservation size 2 < x < 64");
|
2011-05-15 20:52:43 +00:00
|
|
|
SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
|
2009-05-04 15:23:58 +00:00
|
|
|
"Number of times the nfs client has had to reconnect");
|
2011-05-15 20:52:43 +00:00
|
|
|
SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
|
2009-05-04 15:23:58 +00:00
|
|
|
"Number of seconds to delay a retry after receiving EJUKEBOX");
|
2011-05-15 20:52:43 +00:00
|
|
|
SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
|
2009-05-04 15:23:58 +00:00
|
|
|
"Disable weak cache consistency checking when server returns an error");
|
|
|
|
|
|
|
|
static void nfs_down(struct nfsmount *, struct thread *, const char *,
|
|
|
|
int, int);
|
|
|
|
static void nfs_up(struct nfsmount *, struct thread *, const char *,
|
|
|
|
int, int);
|
|
|
|
static int nfs_msg(struct thread *, const char *, const char *, int);
|
|
|
|
|
|
|
|
struct nfs_cached_auth {
|
|
|
|
int ca_refs; /* refcount, including 1 from the cache */
|
|
|
|
uid_t ca_uid; /* uid that corresponds to this auth */
|
|
|
|
AUTH *ca_auth; /* RPC auth handle */
|
|
|
|
};
|
|
|
|
|
2010-05-08 01:24:18 +00:00
|
|
|
static int nfsv2_procid[NFS_V3NPROCS] = {
|
|
|
|
NFSV2PROC_NULL,
|
|
|
|
NFSV2PROC_GETATTR,
|
|
|
|
NFSV2PROC_SETATTR,
|
|
|
|
NFSV2PROC_LOOKUP,
|
|
|
|
NFSV2PROC_NOOP,
|
|
|
|
NFSV2PROC_READLINK,
|
|
|
|
NFSV2PROC_READ,
|
|
|
|
NFSV2PROC_WRITE,
|
|
|
|
NFSV2PROC_CREATE,
|
|
|
|
NFSV2PROC_MKDIR,
|
|
|
|
NFSV2PROC_SYMLINK,
|
|
|
|
NFSV2PROC_CREATE,
|
|
|
|
NFSV2PROC_REMOVE,
|
|
|
|
NFSV2PROC_RMDIR,
|
|
|
|
NFSV2PROC_RENAME,
|
|
|
|
NFSV2PROC_LINK,
|
|
|
|
NFSV2PROC_READDIR,
|
|
|
|
NFSV2PROC_NOOP,
|
|
|
|
NFSV2PROC_STATFS,
|
|
|
|
NFSV2PROC_NOOP,
|
|
|
|
NFSV2PROC_NOOP,
|
|
|
|
NFSV2PROC_NOOP,
|
|
|
|
};
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Initialize sockets and congestion for a new NFS connection.
|
|
|
|
* We do not free the sockaddr if error.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
|
|
|
|
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
|
|
|
|
{
|
|
|
|
int rcvreserve, sndreserve;
|
|
|
|
int pktscale;
|
|
|
|
struct sockaddr *saddr;
|
|
|
|
struct ucred *origcred;
|
|
|
|
CLIENT *client;
|
|
|
|
struct netconfig *nconf;
|
|
|
|
struct socket *so;
|
2011-07-16 08:51:09 +00:00
|
|
|
int one = 1, retries, error = 0;
|
2009-05-04 15:23:58 +00:00
|
|
|
struct thread *td = curthread;
|
2012-12-08 22:52:39 +00:00
|
|
|
SVCXPRT *xprt;
|
2011-12-21 02:45:51 +00:00
|
|
|
struct timeval timo;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to establish the socket using the credentials of
|
|
|
|
* the mountpoint. Some parts of this process (such as
|
|
|
|
* sobind() and soconnect()) will use the curent thread's
|
|
|
|
* credential instead of the socket credential. To work
|
|
|
|
* around this, temporarily change the current thread's
|
|
|
|
* credential to that of the mountpoint.
|
|
|
|
*
|
|
|
|
* XXX: It would be better to explicitly pass the correct
|
|
|
|
* credential to sobind() and soconnect().
|
|
|
|
*/
|
|
|
|
origcred = td->td_ucred;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the credential in nr_cred, if not NULL.
|
|
|
|
*/
|
|
|
|
if (nrp->nr_cred != NULL)
|
|
|
|
td->td_ucred = nrp->nr_cred;
|
|
|
|
else
|
|
|
|
td->td_ucred = cred;
|
|
|
|
saddr = nrp->nr_nam;
|
|
|
|
|
|
|
|
if (saddr->sa_family == AF_INET)
|
|
|
|
if (nrp->nr_sotype == SOCK_DGRAM)
|
|
|
|
nconf = getnetconfigent("udp");
|
|
|
|
else
|
|
|
|
nconf = getnetconfigent("tcp");
|
|
|
|
else
|
|
|
|
if (nrp->nr_sotype == SOCK_DGRAM)
|
|
|
|
nconf = getnetconfigent("udp6");
|
|
|
|
else
|
|
|
|
nconf = getnetconfigent("tcp6");
|
|
|
|
|
|
|
|
pktscale = nfs_bufpackets;
|
|
|
|
if (pktscale < 2)
|
|
|
|
pktscale = 2;
|
|
|
|
if (pktscale > 64)
|
|
|
|
pktscale = 64;
|
|
|
|
/*
|
|
|
|
* soreserve() can fail if sb_max is too small, so shrink pktscale
|
|
|
|
* and try again if there is an error.
|
|
|
|
* Print a log message suggesting increasing sb_max.
|
|
|
|
* Creating a socket and doing this is necessary since, if the
|
|
|
|
* reservation sizes are too large and will make soreserve() fail,
|
|
|
|
* the connection will work until a large send is attempted and
|
|
|
|
* then it will loop in the krpc code.
|
|
|
|
*/
|
|
|
|
so = NULL;
|
|
|
|
saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *);
|
|
|
|
error = socreate(saddr->sa_family, &so, nrp->nr_sotype,
|
|
|
|
nrp->nr_soproto, td->td_ucred, td);
|
|
|
|
if (error) {
|
|
|
|
td->td_ucred = origcred;
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
do {
|
2011-04-17 20:01:32 +00:00
|
|
|
if (error != 0 && pktscale > 2)
|
2009-05-04 15:23:58 +00:00
|
|
|
pktscale--;
|
|
|
|
if (nrp->nr_sotype == SOCK_DGRAM) {
|
|
|
|
if (nmp != NULL) {
|
|
|
|
sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
|
|
|
|
pktscale;
|
|
|
|
rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
|
|
|
|
pktscale;
|
|
|
|
} else {
|
|
|
|
sndreserve = rcvreserve = 1024 * pktscale;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (nrp->nr_sotype != SOCK_STREAM)
|
|
|
|
panic("nfscon sotype");
|
|
|
|
if (nmp != NULL) {
|
|
|
|
sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
|
|
|
|
sizeof (u_int32_t)) * pktscale;
|
|
|
|
rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
|
|
|
|
sizeof (u_int32_t)) * pktscale;
|
|
|
|
} else {
|
|
|
|
sndreserve = rcvreserve = 1024 * pktscale;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
error = soreserve(so, sndreserve, rcvreserve);
|
|
|
|
} while (error != 0 && pktscale > 2);
|
|
|
|
soclose(so);
|
|
|
|
if (error) {
|
|
|
|
td->td_ucred = origcred;
|
2011-07-16 08:51:09 +00:00
|
|
|
goto out;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog,
|
|
|
|
nrp->nr_vers, sndreserve, rcvreserve);
|
2014-12-23 00:47:46 +00:00
|
|
|
CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nmp != NULL) {
|
|
|
|
if ((nmp->nm_flag & NFSMNT_INT))
|
|
|
|
CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
|
|
|
|
if ((nmp->nm_flag & NFSMNT_RESVPORT))
|
|
|
|
CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
|
2011-12-21 02:45:51 +00:00
|
|
|
if (NFSHASSOFT(nmp)) {
|
|
|
|
if (nmp->nm_sotype == SOCK_DGRAM)
|
|
|
|
/*
|
|
|
|
* For UDP, the large timeout for a reconnect
|
|
|
|
* will be set to "nm_retry * nm_timeo / 2", so
|
|
|
|
* we only want to do 2 reconnect timeout
|
|
|
|
* retries.
|
|
|
|
*/
|
|
|
|
retries = 2;
|
|
|
|
else
|
|
|
|
retries = nmp->nm_retry;
|
|
|
|
} else
|
2009-05-04 15:23:58 +00:00
|
|
|
retries = INT_MAX;
|
2017-04-21 22:38:26 +00:00
|
|
|
/* cred == NULL for DS connects. */
|
|
|
|
if (NFSHASNFSV4N(nmp) && cred != NULL) {
|
2012-12-08 22:52:39 +00:00
|
|
|
/*
|
|
|
|
* Make sure the nfscbd_pool doesn't get destroyed
|
|
|
|
* while doing this.
|
|
|
|
*/
|
|
|
|
NFSD_LOCK();
|
|
|
|
if (nfs_numnfscbd > 0) {
|
|
|
|
nfs_numnfscbd++;
|
|
|
|
NFSD_UNLOCK();
|
|
|
|
xprt = svc_vc_create_backchannel(nfscbd_pool);
|
|
|
|
CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt);
|
|
|
|
NFSD_LOCK();
|
|
|
|
nfs_numnfscbd--;
|
|
|
|
if (nfs_numnfscbd == 0)
|
|
|
|
wakeup(&nfs_numnfscbd);
|
|
|
|
}
|
|
|
|
NFSD_UNLOCK();
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Three cases:
|
|
|
|
* - Null RPC callback to client
|
|
|
|
* - Non-Null RPC callback to client, wait a little longer
|
|
|
|
* - upcalls to nfsuserd and gssd (clp == NULL)
|
|
|
|
*/
|
|
|
|
if (callback_retry_mult == 0) {
|
|
|
|
retries = NFSV4_UPCALLRETRY;
|
|
|
|
CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
|
|
|
|
} else {
|
|
|
|
retries = NFSV4_CALLBACKRETRY * callback_retry_mult;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CLNT_CONTROL(client, CLSET_RETRIES, &retries);
|
|
|
|
|
2011-12-21 02:45:51 +00:00
|
|
|
if (nmp != NULL) {
|
|
|
|
/*
|
|
|
|
* For UDP, there are 2 timeouts:
|
|
|
|
* - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
|
|
|
|
* that does a retransmit of an RPC request using the same
|
|
|
|
* socket and xid. This is what you normally want to do,
|
|
|
|
* since NFS servers depend on "same xid" for their
|
|
|
|
* Duplicate Request Cache.
|
|
|
|
* - timeout specified in CLNT_CALL_MBUF(), which specifies when
|
|
|
|
* retransmits on the same socket should fail and a fresh
|
|
|
|
* socket created. Each of these timeouts counts as one
|
|
|
|
* CLSET_RETRIES as set above.
|
|
|
|
* Set the initial retransmit timeout for UDP. This timeout
|
|
|
|
* doesn't exist for TCP and the following call just fails,
|
|
|
|
* which is ok.
|
|
|
|
*/
|
|
|
|
timo.tv_sec = nmp->nm_timeo / NFS_HZ;
|
|
|
|
timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
|
|
|
|
CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
|
|
|
|
}
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
mtx_lock(&nrp->nr_mtx);
|
|
|
|
if (nrp->nr_client != NULL) {
|
2013-11-03 23:17:30 +00:00
|
|
|
mtx_unlock(&nrp->nr_mtx);
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Someone else already connected.
|
|
|
|
*/
|
|
|
|
CLNT_RELEASE(client);
|
|
|
|
} else {
|
|
|
|
nrp->nr_client = client;
|
2013-11-03 23:17:30 +00:00
|
|
|
/*
|
|
|
|
* Protocols that do not require connections may be optionally
|
|
|
|
* left unconnected for servers that reply from a port other
|
|
|
|
* than NFS_PORT.
|
|
|
|
*/
|
|
|
|
if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
|
|
|
|
mtx_unlock(&nrp->nr_mtx);
|
|
|
|
CLNT_CONTROL(client, CLSET_CONNECT, &one);
|
|
|
|
} else
|
|
|
|
mtx_unlock(&nrp->nr_mtx);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Restore current thread's credentials. */
|
|
|
|
td->td_ucred = origcred;
|
2011-07-16 08:51:09 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
NFSEXITCODE(error);
|
|
|
|
return (error);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NFS disconnect. Clean up and unlink.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
newnfs_disconnect(struct nfssockreq *nrp)
|
|
|
|
{
|
|
|
|
CLIENT *client;
|
|
|
|
|
|
|
|
mtx_lock(&nrp->nr_mtx);
|
|
|
|
if (nrp->nr_client != NULL) {
|
|
|
|
client = nrp->nr_client;
|
|
|
|
nrp->nr_client = NULL;
|
|
|
|
mtx_unlock(&nrp->nr_mtx);
|
2011-06-19 22:08:55 +00:00
|
|
|
rpc_gss_secpurge_call(client);
|
2009-05-04 15:23:58 +00:00
|
|
|
CLNT_CLOSE(client);
|
|
|
|
CLNT_RELEASE(client);
|
|
|
|
} else {
|
|
|
|
mtx_unlock(&nrp->nr_mtx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static AUTH *
|
|
|
|
nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal,
|
|
|
|
char *srv_principal, gss_OID mech_oid, struct ucred *cred)
|
|
|
|
{
|
|
|
|
rpc_gss_service_t svc;
|
|
|
|
AUTH *auth;
|
|
|
|
|
|
|
|
switch (secflavour) {
|
|
|
|
case RPCSEC_GSS_KRB5:
|
|
|
|
case RPCSEC_GSS_KRB5I:
|
|
|
|
case RPCSEC_GSS_KRB5P:
|
|
|
|
if (!mech_oid) {
|
2011-06-19 22:08:55 +00:00
|
|
|
if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid))
|
2009-05-04 15:23:58 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
if (secflavour == RPCSEC_GSS_KRB5)
|
|
|
|
svc = rpc_gss_svc_none;
|
|
|
|
else if (secflavour == RPCSEC_GSS_KRB5I)
|
|
|
|
svc = rpc_gss_svc_integrity;
|
|
|
|
else
|
|
|
|
svc = rpc_gss_svc_privacy;
|
2013-07-09 01:05:28 +00:00
|
|
|
|
2009-05-23 00:40:17 +00:00
|
|
|
if (clnt_principal == NULL)
|
2011-06-19 22:08:55 +00:00
|
|
|
auth = rpc_gss_secfind_call(nrp->nr_client, cred,
|
2009-05-23 00:40:17 +00:00
|
|
|
srv_principal, mech_oid, svc);
|
2013-07-09 01:05:28 +00:00
|
|
|
else {
|
|
|
|
auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
|
|
|
|
clnt_principal, srv_principal, "kerberosv5",
|
|
|
|
svc, NULL, NULL, NULL);
|
|
|
|
return (auth);
|
|
|
|
}
|
2009-05-24 03:22:49 +00:00
|
|
|
if (auth != NULL)
|
|
|
|
return (auth);
|
|
|
|
/* fallthrough */
|
2009-05-04 15:23:58 +00:00
|
|
|
case AUTH_SYS:
|
|
|
|
default:
|
|
|
|
return (authunix_create(cred));
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Callback from the RPC code to generate up/down notifications.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct nfs_feedback_arg {
|
|
|
|
struct nfsmount *nf_mount;
|
|
|
|
int nf_lastmsg; /* last tprintf */
|
|
|
|
int nf_tprintfmsg;
|
|
|
|
struct thread *nf_td;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
nfs_feedback(int type, int proc, void *arg)
|
|
|
|
{
|
|
|
|
struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
|
|
|
|
struct nfsmount *nmp = nf->nf_mount;
|
2013-01-25 15:25:24 +00:00
|
|
|
time_t now;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case FEEDBACK_REXMIT2:
|
|
|
|
case FEEDBACK_RECONNECT:
|
2013-01-25 15:25:24 +00:00
|
|
|
now = NFSD_MONOSEC;
|
|
|
|
if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
|
2009-05-04 15:23:58 +00:00
|
|
|
nfs_down(nmp, nf->nf_td,
|
|
|
|
"not responding", 0, NFSSTA_TIMEO);
|
|
|
|
nf->nf_tprintfmsg = TRUE;
|
2013-01-25 15:25:24 +00:00
|
|
|
nf->nf_lastmsg = now;
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FEEDBACK_OK:
|
|
|
|
nfs_up(nf->nf_mount, nf->nf_td,
|
|
|
|
"is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* newnfs_request - goes something like this
|
|
|
|
* - does the rpc by calling the krpc layer
|
|
|
|
* - break down rpc header and return with nfs reply
|
|
|
|
* nb: always frees up nd_mreq mbuf list
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
|
|
|
|
struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
|
|
|
|
struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
|
2009-05-04 15:23:58 +00:00
|
|
|
{
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
uint32_t retseq, retval, slotseq, *tl;
|
2009-05-04 15:23:58 +00:00
|
|
|
time_t waituntil;
|
2012-12-08 22:52:39 +00:00
|
|
|
int i = 0, j = 0, opcnt, set_sigset = 0, slot;
|
2009-05-04 15:23:58 +00:00
|
|
|
int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS;
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
int freeslot, maxslot, reterr, slotpos, timeo;
|
2009-05-04 15:23:58 +00:00
|
|
|
u_int16_t procnum;
|
|
|
|
u_int trylater_delay = 1;
|
|
|
|
struct nfs_feedback_arg nf;
|
2013-01-25 15:25:24 +00:00
|
|
|
struct timeval timo;
|
2009-05-04 15:23:58 +00:00
|
|
|
AUTH *auth;
|
|
|
|
struct rpc_callextra ext;
|
|
|
|
enum clnt_stat stat;
|
|
|
|
struct nfsreq *rep = NULL;
|
2013-07-09 01:05:28 +00:00
|
|
|
char *srv_principal = NULL, *clnt_principal = NULL;
|
2009-07-12 17:07:35 +00:00
|
|
|
sigset_t oldset;
|
2012-01-20 00:58:51 +00:00
|
|
|
struct ucred *authcred;
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
struct nfsclsession *sep;
|
|
|
|
uint8_t sessionid[NFSX_V4SESSIONID];
|
2009-05-04 15:23:58 +00:00
|
|
|
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
sep = dssep;
|
2009-05-04 15:23:58 +00:00
|
|
|
if (xidp != NULL)
|
|
|
|
*xidp = 0;
|
|
|
|
/* Reject requests while attempting a forced unmount. */
|
|
|
|
if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) {
|
|
|
|
m_freem(nd->nd_mreq);
|
|
|
|
return (ESTALE);
|
|
|
|
}
|
|
|
|
|
2012-01-20 00:58:51 +00:00
|
|
|
/*
|
|
|
|
* Set authcred, which is used to acquire RPC credentials to
|
|
|
|
* the cred argument, by default. The crhold() should not be
|
|
|
|
* necessary, but will ensure that some future code change
|
|
|
|
* doesn't result in the credential being free'd prematurely.
|
|
|
|
*/
|
|
|
|
authcred = crhold(cred);
|
|
|
|
|
2009-07-12 17:07:35 +00:00
|
|
|
/* For client side interruptible mounts, mask off the signals. */
|
|
|
|
if (nmp != NULL && td != NULL && NFSHASINT(nmp)) {
|
|
|
|
newnfs_set_sigmask(td, &oldset);
|
|
|
|
set_sigset = 1;
|
|
|
|
}
|
|
|
|
|
2009-05-24 03:22:49 +00:00
|
|
|
/*
|
|
|
|
* XXX if not already connected call nfs_connect now. Longer
|
|
|
|
* term, change nfs_mount to call nfs_connect unconditionally
|
|
|
|
* and let clnt_reconnect_create handle reconnects.
|
|
|
|
*/
|
|
|
|
if (nrp->nr_client == NULL)
|
|
|
|
newnfs_connect(nmp, nrp, cred, td, 0);
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* For a client side mount, nmp is != NULL and clp == NULL. For
|
|
|
|
* server calls (callbacks or upcalls), nmp == NULL.
|
|
|
|
*/
|
|
|
|
if (clp != NULL) {
|
|
|
|
NFSLOCKSTATE();
|
|
|
|
if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) {
|
|
|
|
secflavour = RPCSEC_GSS_KRB5;
|
|
|
|
if (nd->nd_procnum != NFSPROC_NULL) {
|
|
|
|
if (clp->lc_flags & LCL_GSSINTEGRITY)
|
|
|
|
secflavour = RPCSEC_GSS_KRB5I;
|
|
|
|
else if (clp->lc_flags & LCL_GSSPRIVACY)
|
|
|
|
secflavour = RPCSEC_GSS_KRB5P;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NFSUNLOCKSTATE();
|
|
|
|
} else if (nmp != NULL && NFSHASKERB(nmp) &&
|
|
|
|
nd->nd_procnum != NFSPROC_NULL) {
|
|
|
|
if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0)
|
|
|
|
nd->nd_flag |= ND_USEGSSNAME;
|
2009-05-24 03:22:49 +00:00
|
|
|
if ((nd->nd_flag & ND_USEGSSNAME) != 0) {
|
|
|
|
/*
|
|
|
|
* If there is a client side host based credential,
|
|
|
|
* use that, otherwise use the system uid, if set.
|
2012-01-20 00:58:51 +00:00
|
|
|
* The system uid is in the nmp->nm_sockreq.nr_cred
|
|
|
|
* credentials.
|
2009-05-24 03:22:49 +00:00
|
|
|
*/
|
|
|
|
if (nmp->nm_krbnamelen > 0) {
|
|
|
|
usegssname = 1;
|
2013-07-09 01:05:28 +00:00
|
|
|
clnt_principal = nmp->nm_krbname;
|
2009-05-24 03:22:49 +00:00
|
|
|
} else if (nmp->nm_uid != (uid_t)-1) {
|
2012-01-20 00:58:51 +00:00
|
|
|
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
|
|
|
|
("newnfs_request: NULL nr_cred"));
|
|
|
|
crfree(authcred);
|
|
|
|
authcred = crhold(nmp->nm_sockreq.nr_cred);
|
2009-05-24 03:22:49 +00:00
|
|
|
}
|
|
|
|
} else if (nmp->nm_krbnamelen == 0 &&
|
|
|
|
nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) {
|
|
|
|
/*
|
|
|
|
* If there is no host based principal name and
|
|
|
|
* the system uid is set and this is root, use the
|
|
|
|
* system uid, since root won't have user
|
|
|
|
* credentials in a credentials cache file.
|
2012-01-20 00:58:51 +00:00
|
|
|
* The system uid is in the nmp->nm_sockreq.nr_cred
|
|
|
|
* credentials.
|
2009-05-24 03:22:49 +00:00
|
|
|
*/
|
2012-01-20 00:58:51 +00:00
|
|
|
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
|
|
|
|
("newnfs_request: NULL nr_cred"));
|
|
|
|
crfree(authcred);
|
|
|
|
authcred = crhold(nmp->nm_sockreq.nr_cred);
|
2009-05-24 03:22:49 +00:00
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (NFSHASINTEGRITY(nmp))
|
|
|
|
secflavour = RPCSEC_GSS_KRB5I;
|
|
|
|
else if (NFSHASPRIVACY(nmp))
|
|
|
|
secflavour = RPCSEC_GSS_KRB5P;
|
|
|
|
else
|
|
|
|
secflavour = RPCSEC_GSS_KRB5;
|
|
|
|
srv_principal = NFSMNT_SRVKRBNAME(nmp);
|
2011-06-22 19:47:45 +00:00
|
|
|
} else if (nmp != NULL && !NFSHASKERB(nmp) &&
|
|
|
|
nd->nd_procnum != NFSPROC_NULL &&
|
|
|
|
(nd->nd_flag & ND_USEGSSNAME) != 0) {
|
|
|
|
/*
|
|
|
|
* Use the uid that did the mount when the RPC is doing
|
|
|
|
* NFSv4 system operations, as indicated by the
|
|
|
|
* ND_USEGSSNAME flag, for the AUTH_SYS case.
|
2012-01-20 00:58:51 +00:00
|
|
|
* The credentials in nm_sockreq.nr_cred were used for the
|
|
|
|
* mount.
|
2011-06-22 19:47:45 +00:00
|
|
|
*/
|
2012-01-20 00:58:51 +00:00
|
|
|
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
|
|
|
|
("newnfs_request: NULL nr_cred"));
|
|
|
|
crfree(authcred);
|
|
|
|
authcred = crhold(nmp->nm_sockreq.nr_cred);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (nmp != NULL) {
|
|
|
|
bzero(&nf, sizeof(struct nfs_feedback_arg));
|
|
|
|
nf.nf_mount = nmp;
|
|
|
|
nf.nf_td = td;
|
2013-01-25 15:25:24 +00:00
|
|
|
nf.nf_lastmsg = NFSD_MONOSEC -
|
2009-05-04 15:23:58 +00:00
|
|
|
((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay));
|
|
|
|
}
|
|
|
|
|
2009-05-16 03:12:55 +00:00
|
|
|
if (nd->nd_procnum == NFSPROC_NULL)
|
|
|
|
auth = authnone_create();
|
2013-07-09 01:05:28 +00:00
|
|
|
else if (usegssname) {
|
|
|
|
/*
|
|
|
|
* For this case, the authenticator is held in the
|
|
|
|
* nfssockreq structure, so don't release the reference count
|
|
|
|
* held on it. --> Don't AUTH_DESTROY() it in this function.
|
|
|
|
*/
|
|
|
|
if (nrp->nr_auth == NULL)
|
|
|
|
nrp->nr_auth = nfs_getauth(nrp, secflavour,
|
|
|
|
clnt_principal, srv_principal, NULL, authcred);
|
|
|
|
else
|
|
|
|
rpc_gss_refresh_auth_call(nrp->nr_auth);
|
|
|
|
auth = nrp->nr_auth;
|
|
|
|
} else
|
2009-05-04 15:23:58 +00:00
|
|
|
auth = nfs_getauth(nrp, secflavour, NULL,
|
2012-01-20 00:58:51 +00:00
|
|
|
srv_principal, NULL, authcred);
|
|
|
|
crfree(authcred);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (auth == NULL) {
|
|
|
|
m_freem(nd->nd_mreq);
|
2009-07-12 17:07:35 +00:00
|
|
|
if (set_sigset)
|
|
|
|
newnfs_restore_sigmask(td, &oldset);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (EACCES);
|
|
|
|
}
|
|
|
|
bzero(&ext, sizeof(ext));
|
|
|
|
ext.rc_auth = auth;
|
|
|
|
if (nmp != NULL) {
|
|
|
|
ext.rc_feedback = nfs_feedback;
|
|
|
|
ext.rc_feedback_arg = &nf;
|
|
|
|
}
|
|
|
|
|
|
|
|
procnum = nd->nd_procnum;
|
|
|
|
if ((nd->nd_flag & ND_NFSV4) &&
|
2009-05-16 03:12:55 +00:00
|
|
|
nd->nd_procnum != NFSPROC_NULL &&
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_procnum != NFSV4PROC_CBCOMPOUND)
|
|
|
|
procnum = NFSV4PROC_COMPOUND;
|
|
|
|
|
|
|
|
if (nmp != NULL) {
|
2016-08-12 22:44:59 +00:00
|
|
|
NFSINCRGLOBAL(nfsstatsv1.rpcrequests);
|
2010-05-08 01:24:18 +00:00
|
|
|
|
|
|
|
/* Map the procnum to the old NFSv2 one, as required. */
|
|
|
|
if ((nd->nd_flag & ND_NFSV2) != 0) {
|
|
|
|
if (nd->nd_procnum < NFS_V3NPROCS)
|
|
|
|
procnum = nfsv2_procid[nd->nd_procnum];
|
|
|
|
else
|
|
|
|
procnum = NFSV2PROC_NOOP;
|
|
|
|
}
|
|
|
|
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* Now only used for the R_DONTRECOVER case, but until that is
|
|
|
|
* supported within the krpc code, I need to keep a queue of
|
|
|
|
* outstanding RPCs for nfsv4 client requests.
|
|
|
|
*/
|
|
|
|
if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND)
|
|
|
|
MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq),
|
|
|
|
M_NFSDREQ, M_WAITOK);
|
2011-06-18 23:02:53 +00:00
|
|
|
#ifdef KDTRACE_HOOKS
|
|
|
|
if (dtrace_nfscl_nfs234_start_probe != NULL) {
|
|
|
|
uint32_t probe_id;
|
|
|
|
int probe_procnum;
|
|
|
|
|
|
|
|
if (nd->nd_flag & ND_NFSV4) {
|
|
|
|
probe_id =
|
|
|
|
nfscl_nfs4_start_probes[nd->nd_procnum];
|
|
|
|
probe_procnum = nd->nd_procnum;
|
|
|
|
} else if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
probe_id = nfscl_nfs3_start_probes[procnum];
|
|
|
|
probe_procnum = procnum;
|
|
|
|
} else {
|
|
|
|
probe_id =
|
|
|
|
nfscl_nfs2_start_probes[nd->nd_procnum];
|
|
|
|
probe_procnum = procnum;
|
|
|
|
}
|
|
|
|
if (probe_id != 0)
|
|
|
|
(dtrace_nfscl_nfs234_start_probe)
|
|
|
|
(probe_id, vp, nd->nd_mreq, cred,
|
|
|
|
probe_procnum);
|
|
|
|
}
|
|
|
|
#endif
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
trycnt = 0;
|
2012-12-08 22:52:39 +00:00
|
|
|
freeslot = -1; /* Set to slot that needs to be free'd */
|
2009-05-04 15:23:58 +00:00
|
|
|
tryagain:
|
2012-12-08 22:52:39 +00:00
|
|
|
slot = -1; /* Slot that needs a sequence# increment. */
|
2011-12-21 02:45:51 +00:00
|
|
|
/*
|
|
|
|
* This timeout specifies when a new socket should be created,
|
|
|
|
* along with new xid values. For UDP, this should be done
|
|
|
|
* infrequently, since retransmits of RPC requests should normally
|
|
|
|
* use the same xid.
|
|
|
|
*/
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nmp == NULL) {
|
|
|
|
timo.tv_usec = 0;
|
|
|
|
if (clp == NULL)
|
|
|
|
timo.tv_sec = NFSV4_UPCALLTIMEO;
|
|
|
|
else
|
|
|
|
timo.tv_sec = NFSV4_CALLBACKTIMEO;
|
|
|
|
} else {
|
|
|
|
if (nrp->nr_sotype != SOCK_DGRAM) {
|
|
|
|
timo.tv_usec = 0;
|
|
|
|
if ((nmp->nm_flag & NFSMNT_NFSV4))
|
|
|
|
timo.tv_sec = INT_MAX;
|
|
|
|
else
|
|
|
|
timo.tv_sec = NFS_TCPTIMEO;
|
|
|
|
} else {
|
2011-12-21 02:45:51 +00:00
|
|
|
if (NFSHASSOFT(nmp)) {
|
|
|
|
/*
|
|
|
|
* CLSET_RETRIES is set to 2, so this should be
|
|
|
|
* half of the total timeout required.
|
|
|
|
*/
|
|
|
|
timeo = nmp->nm_retry * nmp->nm_timeo / 2;
|
|
|
|
if (timeo < 1)
|
|
|
|
timeo = 1;
|
|
|
|
timo.tv_sec = timeo / NFS_HZ;
|
|
|
|
timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
|
|
|
|
NFS_HZ;
|
|
|
|
} else {
|
|
|
|
/* For UDP hard mounts, use a large value. */
|
|
|
|
timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
|
|
|
|
timo.tv_usec = 0;
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (rep != NULL) {
|
|
|
|
rep->r_flags = 0;
|
|
|
|
rep->r_nmp = nmp;
|
|
|
|
/*
|
|
|
|
* Chain request into list of outstanding requests.
|
|
|
|
*/
|
|
|
|
NFSLOCKREQ();
|
|
|
|
TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain);
|
|
|
|
NFSUNLOCKREQ();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nd->nd_mrep = NULL;
|
2014-07-01 20:47:16 +00:00
|
|
|
if (clp != NULL && sep != NULL)
|
|
|
|
stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
|
|
|
|
nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
|
|
|
|
else
|
|
|
|
stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
|
|
|
|
nd->nd_mreq, &nd->nd_mrep, timo);
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
if (rep != NULL) {
|
|
|
|
/*
|
|
|
|
* RPC done, unlink the request.
|
|
|
|
*/
|
|
|
|
NFSLOCKREQ();
|
|
|
|
TAILQ_REMOVE(&nfsd_reqq, rep, r_chain);
|
|
|
|
NFSUNLOCKREQ();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there was a successful reply and a tprintf msg.
|
|
|
|
* tprintf a response.
|
|
|
|
*/
|
|
|
|
if (stat == RPC_SUCCESS) {
|
|
|
|
error = 0;
|
|
|
|
} else if (stat == RPC_TIMEDOUT) {
|
2016-08-12 22:44:59 +00:00
|
|
|
NFSINCRGLOBAL(nfsstatsv1.rpctimeouts);
|
2009-05-04 15:23:58 +00:00
|
|
|
error = ETIMEDOUT;
|
|
|
|
} else if (stat == RPC_VERSMISMATCH) {
|
2016-08-12 22:44:59 +00:00
|
|
|
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
|
2009-05-04 15:23:58 +00:00
|
|
|
error = EOPNOTSUPP;
|
|
|
|
} else if (stat == RPC_PROGVERSMISMATCH) {
|
2016-08-12 22:44:59 +00:00
|
|
|
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
|
2009-05-04 15:23:58 +00:00
|
|
|
error = EPROTONOSUPPORT;
|
2013-01-15 22:08:17 +00:00
|
|
|
} else if (stat == RPC_INTR) {
|
|
|
|
error = EINTR;
|
2009-05-04 15:23:58 +00:00
|
|
|
} else {
|
2016-08-12 22:44:59 +00:00
|
|
|
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
|
2009-05-04 15:23:58 +00:00
|
|
|
error = EACCES;
|
|
|
|
}
|
|
|
|
if (error) {
|
|
|
|
m_freem(nd->nd_mreq);
|
2013-07-09 01:05:28 +00:00
|
|
|
if (usegssname == 0)
|
|
|
|
AUTH_DESTROY(auth);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (rep != NULL)
|
|
|
|
FREE((caddr_t)rep, M_NFSDREQ);
|
2009-07-12 17:07:35 +00:00
|
|
|
if (set_sigset)
|
|
|
|
newnfs_restore_sigmask(td, &oldset);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
|
|
|
|
|
2009-05-24 19:46:12 +00:00
|
|
|
/*
|
|
|
|
* Search for any mbufs that are not a multiple of 4 bytes long
|
|
|
|
* or with m_data not longword aligned.
|
|
|
|
* These could cause pointer alignment problems, so copy them to
|
|
|
|
* well aligned mbufs.
|
|
|
|
*/
|
Revamp the old NFS server's File Handle Affinity (FHA) code so that
it will work with either the old or new server.
The FHA code keeps a cache of currently active file handles for
NFSv2 and v3 requests, so that read and write requests for the same
file are directed to the same group of threads (reads) or thread
(writes). It does not currently work for NFSv4 requests. They are
more complex, and will take more work to support.
This improves read-ahead performance, especially with ZFS, if the
FHA tuning parameters are configured appropriately. Without the
FHA code, concurrent reads that are part of a sequential read from
a file will be directed to separate NFS threads. This has the
effect of confusing the ZFS zfetch (prefetch) code and makes
sequential reads significantly slower with clients like Linux that
do a lot of prefetching.
The FHA code has also been updated to direct write requests to nearby
file offsets to the same thread in the same way it batches reads,
and the FHA code will now also send writes to multiple threads when
needed.
This improves sequential write performance in ZFS, because writes
to a file are now more ordered. Since NFS writes (generally
less than 64K) are smaller than the typical ZFS record size
(usually 128K), out of order NFS writes to the same block can
trigger a read in ZFS. Sending them down the same thread increases
the odds of their being in order.
In order for multiple write threads per file in the FHA code to be
useful, writes in the NFS server have been changed to use a LK_SHARED
vnode lock, and upgrade that to LK_EXCLUSIVE if the filesystem
doesn't allow multiple writers to a file at once. ZFS is currently
the only filesystem that allows multiple writers to a file, because
it has internal file range locking. This change does not affect the
NFSv4 code.
This improves random write performance to a single file in ZFS, since
we can now have multiple writers inside ZFS at one time.
I have changed the default tuning parameters to a 22 bit (4MB)
window size (from 256K) and unlimited commands per thread as a
result of my benchmarking with ZFS.
The FHA code has been updated to allow configuring the tuning
parameters from loader tunable variables in addition to sysctl
variables. The read offset window calculation has been slightly
modified as well. Instead of having separate bins, each file
handle has a rolling window of bin_shift size. This minimizes
glitches in throughput when shifting from one bin to another.
sys/conf/files:
Add nfs_fha_new.c and nfs_fha_old.c. Compile nfs_fha.c
when either the old or the new NFS server is built.
sys/fs/nfs/nfsport.h,
sys/fs/nfs/nfs_commonport.c:
Bring in changes from Rick Macklem to newnfs_realign that
allow it to operate in blocking (M_WAITOK) or non-blocking
(M_NOWAIT) mode.
sys/fs/nfs/nfs_commonsubs.c,
sys/fs/nfs/nfs_var.h:
Bring in a change from Rick Macklem to allow telling
nfsm_dissect() whether or not to wait for mallocs.
sys/fs/nfs/nfsm_subs.h:
Bring in changes from Rick Macklem to create a new
nfsm_dissect_nonblock() inline function and
NFSM_DISSECT_NONBLOCK() macro.
sys/fs/nfs/nfs_commonkrpc.c,
sys/fs/nfsclient/nfs_clkrpc.c:
Add the malloc wait flag to a newnfs_realign() call.
sys/fs/nfsserver/nfs_nfsdkrpc.c:
Setup the new NFS server's RPC thread pool so that it will
call the FHA code.
Add the malloc flag argument to newnfs_realign().
Unstaticize newnfs_nfsv3_procid[] so that we can use it in
the FHA code.
sys/fs/nfsserver/nfs_nfsdsocket.c:
In nfsrvd_dorpc(), add NFSPROC_WRITE to the list of RPC types
that use the LK_SHARED lock type.
sys/fs/nfsserver/nfs_nfsdport.c:
In nfsd_fhtovp(), if we're starting a write, check to see
whether the underlying filesystem supports shared writes.
If not, upgrade the lock type from LK_SHARED to LK_EXCLUSIVE.
sys/nfsserver/nfs_fha.c:
Remove all code that is specific to the NFS server
implementation. Anything that is server-specific is now
accessed through a callback supplied by that server's FHA
shim in the new softc.
There are now separate sysctls and tunables for the FHA
implementations for the old and new NFS servers. The new
NFS server has its tunables under vfs.nfsd.fha, the old
NFS server's tunables are under vfs.nfsrv.fha as before.
In fha_extract_info(), use callouts for all server-specific
code. Getting file handles and offsets is now done in the
individual server's shim module.
In fha_hash_entry_choose_thread(), change the way we decide
whether two reads are in proximity to each other.
Previously, the calculation was a simple shift operation to
see whether the offsets were in the same power of 2 bucket.
The issue was that there would be a bucket (and therefore
thread) transition, even if the reads were in close
proximity. When there is a thread transition, reads wind
up going somewhat out of order, and ZFS gets confused.
The new calculation simply tries to see whether the offsets
are within 1 << bin_shift of each other. If they are, the
reads will be sent to the same thread.
The effect of this change is that for sequential reads, if
the client doesn't exceed the max_reqs_per_nfsd parameter
and the bin_shift is set to a reasonable value (22, or
4MB works well in my tests), the reads in any sequential
stream will largely be confined to a single thread.
Change fha_assign() so that it takes a softc argument. It
is now called from the individual server's shim code, which
will pass in the softc.
Change fhe_stats_sysctl() so that it takes a softc
parameter. It is now called from the individual server's
shim code. Add the current offset to the list of things
printed out about each active thread.
Change the num_reads and num_writes counters in the
fha_hash_entry structure to 32-bit values, and rename them
num_rw and num_exclusive, respectively, to reflect their
changed usage.
Add an enable sysctl and tunable that allows the user to
disable the FHA code (when vfs.XXX.fha.enable = 0). This
is useful for before/after performance comparisons.
nfs_fha.h:
Move most structure definitions out of nfs_fha.c and into
the header file, so that the individual server shims can
see them.
Change the default bin_shift to 22 (4MB) instead of 18
(256K). Allow unlimited commands per thread.
sys/nfsserver/nfs_fha_old.c,
sys/nfsserver/nfs_fha_old.h,
sys/fs/nfsserver/nfs_fha_new.c,
sys/fs/nfsserver/nfs_fha_new.h:
Add shims for the old and new NFS servers to interface with
the FHA code, and callbacks for the
The shims contain all of the code and definitions that are
specific to the NFS servers.
They setup the server-specific callbacks and set the server
name for the sysctl and loader tunable variables.
sys/nfsserver/nfs_srvkrpc.c:
Configure the RPC code to call fhaold_assign() instead of
fha_assign().
sys/modules/nfsd/Makefile:
Add nfs_fha.c and nfs_fha_new.c.
sys/modules/nfsserver/Makefile:
Add nfs_fha_old.c.
Reviewed by: rmacklem
Sponsored by: Spectra Logic
MFC after: 2 weeks
2013-04-17 21:00:22 +00:00
|
|
|
newnfs_realign(&nd->nd_mrep, M_WAITOK);
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_md = nd->nd_mrep;
|
|
|
|
nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t);
|
|
|
|
nd->nd_repstat = 0;
|
2014-07-01 20:47:16 +00:00
|
|
|
if (nd->nd_procnum != NFSPROC_NULL &&
|
|
|
|
nd->nd_procnum != NFSV4PROC_CBNULL) {
|
2012-12-08 22:52:39 +00:00
|
|
|
/* If sep == NULL, set it to the default in nmp. */
|
|
|
|
if (sep == NULL && nmp != NULL)
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
sep = nfsmnt_mdssession(nmp);
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* and now the actual NFS xdr.
|
|
|
|
*/
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
|
2012-12-08 22:52:39 +00:00
|
|
|
if (nd->nd_repstat >= 10000)
|
|
|
|
NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
|
|
|
|
(int)nd->nd_repstat);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get rid of the tag, return count and SEQUENCE result for
|
|
|
|
* NFSv4.
|
|
|
|
*/
|
|
|
|
if ((nd->nd_flag & ND_NFSV4) != 0) {
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
|
|
|
|
i = fxdr_unsigned(int, *tl);
|
|
|
|
error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
|
|
|
|
if (error)
|
|
|
|
goto nfsmout;
|
|
|
|
NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
|
|
|
|
opcnt = fxdr_unsigned(int, *tl++);
|
|
|
|
i = fxdr_unsigned(int, *tl++);
|
|
|
|
j = fxdr_unsigned(int, *tl);
|
|
|
|
if (j >= 10000)
|
|
|
|
NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
|
|
|
|
/*
|
|
|
|
* If the first op is Sequence, free up the slot.
|
|
|
|
*/
|
2014-07-01 20:47:16 +00:00
|
|
|
if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
|
|
|
|
(clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0))
|
2012-12-08 22:52:39 +00:00
|
|
|
NFSCL_DEBUG(1, "failed seq=%d\n", j);
|
2014-07-01 20:47:16 +00:00
|
|
|
if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
|
|
|
|
(clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0)
|
|
|
|
) {
|
|
|
|
if (i == NFSV4OP_SEQUENCE)
|
|
|
|
NFSM_DISSECT(tl, uint32_t *,
|
|
|
|
NFSX_V4SESSIONID +
|
|
|
|
5 * NFSX_UNSIGNED);
|
|
|
|
else
|
|
|
|
NFSM_DISSECT(tl, uint32_t *,
|
|
|
|
NFSX_V4SESSIONID +
|
|
|
|
4 * NFSX_UNSIGNED);
|
2012-12-08 22:52:39 +00:00
|
|
|
mtx_lock(&sep->nfsess_mtx);
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
if (bcmp(tl, sep->nfsess_sessionid,
|
|
|
|
NFSX_V4SESSIONID) == 0) {
|
|
|
|
tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
|
|
|
|
retseq = fxdr_unsigned(uint32_t, *tl++);
|
|
|
|
slot = fxdr_unsigned(int, *tl++);
|
|
|
|
freeslot = slot;
|
|
|
|
if (retseq != sep->nfsess_slotseq[slot])
|
|
|
|
printf("retseq diff 0x%x\n",
|
|
|
|
retseq);
|
|
|
|
retval = fxdr_unsigned(uint32_t, *++tl);
|
|
|
|
if ((retval + 1) < sep->nfsess_foreslots
|
|
|
|
)
|
|
|
|
sep->nfsess_foreslots = (retval
|
|
|
|
+ 1);
|
|
|
|
else if ((retval + 1) >
|
|
|
|
sep->nfsess_foreslots)
|
|
|
|
sep->nfsess_foreslots = (retval
|
|
|
|
< 64) ? (retval + 1) : 64;
|
|
|
|
}
|
2012-12-08 22:52:39 +00:00
|
|
|
mtx_unlock(&sep->nfsess_mtx);
|
|
|
|
|
|
|
|
/* Grab the op and status for the next one. */
|
|
|
|
if (opcnt > 1) {
|
|
|
|
NFSM_DISSECT(tl, uint32_t *,
|
|
|
|
2 * NFSX_UNSIGNED);
|
|
|
|
i = fxdr_unsigned(int, *tl++);
|
|
|
|
j = fxdr_unsigned(int, *tl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-05-04 15:23:58 +00:00
|
|
|
if (nd->nd_repstat != 0) {
|
Fix NFSv4.1 client recovery from NFS4ERR_BAD_SESSION errors.
For most NFSv4.1 servers, a NFS4ERR_BAD_SESSION error is a rare failure
that indicates that the server has lost session/open/lock state.
However, recent testing by cperciva@ against the AmazonEFS server found
several problems with client recovery from this due to it generating this
failure frequently.
Briefly, the problems fixed are:
- If all session slots were in use at the time of the failure, some processes
would continue to loop waiting for a slot on the old session forever.
- If an RPC that doesn't use open/lock state failed with NFS4ERR_BAD_SESSION,
it would fail the RPC/syscall instead of initiating recovery and then
looping to retry the RPC.
- If a successful reply to an RPC for an old session wasn't processed
until after a new session was created for a NFS4ERR_BAD_SESSION error,
it would erroneously update the new session and corrupt it.
- The use of the first element of the session list in the nfs mount
structure (which is always the current metadata session) was slightly
racey. With changes for the above problems it became more racey, so all
uses of this head pointer was wrapped with a NFSLOCKMNT()/NFSUNLOCKMNT().
- Although the kernel malloc() usually allocates more bytes than requested
and, as such, this wouldn't have caused problems, the allocation of a
session structure was 1 byte smaller than it should have been.
(Null termination byte for the string not included in byte count.)
There are probably still problems with a pNFS data server that fails
with NFS4ERR_BAD_SESSION, but I have no server that does this to test
against (the AmazonEFS server doesn't do pNFS), so I can't fix these yet.
Although this patch is fairly large, it should only affect the handling
of NFS4ERR_BAD_SESSION error replies from an NFSv4.1 server.
Thanks go to cperciva@ for the extension testing he did to help isolate/fix
these problems.
Reported by: cperciva
Tested by: cperciva
MFC after: 3 months
Differential Revision: https://reviews.freebsd.org/D8745
2016-12-23 23:14:53 +00:00
|
|
|
if (nd->nd_repstat == NFSERR_BADSESSION &&
|
|
|
|
nmp != NULL && dssep == NULL) {
|
|
|
|
/*
|
|
|
|
* If this is a client side MDS RPC, mark
|
|
|
|
* the MDS session defunct and initiate
|
|
|
|
* recovery, as required.
|
|
|
|
* The nfsess_defunct field is protected by
|
|
|
|
* the NFSLOCKMNT()/nm_mtx lock and not the
|
|
|
|
* nfsess_mtx lock to simplify its handling,
|
|
|
|
* for the MDS session. This lock is also
|
|
|
|
* sufficient for nfsess_sessionid, since it
|
|
|
|
* never changes in the structure.
|
|
|
|
*/
|
|
|
|
NFSCL_DEBUG(1, "Got badsession\n");
|
|
|
|
NFSLOCKCLSTATE();
|
|
|
|
NFSLOCKMNT(nmp);
|
|
|
|
sep = NFSMNT_MDSSESSION(nmp);
|
|
|
|
if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
|
|
|
|
NFSX_V4SESSIONID) == 0) {
|
|
|
|
/* Initiate recovery. */
|
|
|
|
sep->nfsess_defunct = 1;
|
|
|
|
NFSCL_DEBUG(1, "Marked defunct\n");
|
|
|
|
if (nmp->nm_clp != NULL) {
|
|
|
|
nmp->nm_clp->nfsc_flags |=
|
|
|
|
NFSCLFLAGS_RECOVER;
|
|
|
|
wakeup(nmp->nm_clp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NFSUNLOCKCLSTATE();
|
|
|
|
/*
|
|
|
|
* Sleep for up to 1sec waiting for a new
|
|
|
|
* session.
|
|
|
|
*/
|
|
|
|
mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
|
|
|
|
"nfsbadsess", hz);
|
|
|
|
/*
|
|
|
|
* Get the session again, in case a new one
|
|
|
|
* has been created during the sleep.
|
|
|
|
*/
|
|
|
|
sep = NFSMNT_MDSSESSION(nmp);
|
|
|
|
NFSUNLOCKMNT(nmp);
|
|
|
|
if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
|
|
|
|
reterr = nfsv4_sequencelookup(nmp, sep,
|
|
|
|
&slotpos, &maxslot, &slotseq,
|
|
|
|
sessionid);
|
|
|
|
if (reterr == 0) {
|
|
|
|
/* Fill in new session info. */
|
|
|
|
NFSCL_DEBUG(1,
|
|
|
|
"Filling in new sequence\n");
|
|
|
|
tl = nd->nd_sequence;
|
|
|
|
bcopy(sessionid, tl,
|
|
|
|
NFSX_V4SESSIONID);
|
|
|
|
tl += NFSX_V4SESSIONID /
|
|
|
|
NFSX_UNSIGNED;
|
|
|
|
*tl++ = txdr_unsigned(slotseq);
|
|
|
|
*tl++ = txdr_unsigned(slotpos);
|
|
|
|
*tl = txdr_unsigned(maxslot);
|
|
|
|
}
|
|
|
|
if (reterr == NFSERR_BADSESSION ||
|
|
|
|
reterr == 0) {
|
|
|
|
NFSCL_DEBUG(1,
|
|
|
|
"Badsession looping\n");
|
|
|
|
m_freem(nd->nd_mrep);
|
|
|
|
nd->nd_mrep = NULL;
|
|
|
|
goto tryagain;
|
|
|
|
}
|
|
|
|
nd->nd_repstat = reterr;
|
|
|
|
NFSCL_DEBUG(1, "Got err=%d\n", reterr);
|
|
|
|
}
|
|
|
|
}
|
2011-07-16 20:53:27 +00:00
|
|
|
if (((nd->nd_repstat == NFSERR_DELAY ||
|
|
|
|
nd->nd_repstat == NFSERR_GRACE) &&
|
2009-05-04 15:23:58 +00:00
|
|
|
(nd->nd_flag & ND_NFSV4) &&
|
2011-07-16 20:53:27 +00:00
|
|
|
nd->nd_procnum != NFSPROC_DELEGRETURN &&
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_procnum != NFSPROC_SETATTR &&
|
|
|
|
nd->nd_procnum != NFSPROC_READ &&
|
2012-12-08 22:52:39 +00:00
|
|
|
nd->nd_procnum != NFSPROC_READDS &&
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_procnum != NFSPROC_WRITE &&
|
2012-12-08 22:52:39 +00:00
|
|
|
nd->nd_procnum != NFSPROC_WRITEDS &&
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_procnum != NFSPROC_OPEN &&
|
|
|
|
nd->nd_procnum != NFSPROC_CREATE &&
|
|
|
|
nd->nd_procnum != NFSPROC_OPENCONFIRM &&
|
|
|
|
nd->nd_procnum != NFSPROC_OPENDOWNGRADE &&
|
|
|
|
nd->nd_procnum != NFSPROC_CLOSE &&
|
|
|
|
nd->nd_procnum != NFSPROC_LOCK &&
|
|
|
|
nd->nd_procnum != NFSPROC_LOCKU) ||
|
|
|
|
(nd->nd_repstat == NFSERR_DELAY &&
|
|
|
|
(nd->nd_flag & ND_NFSV4) == 0) ||
|
|
|
|
nd->nd_repstat == NFSERR_RESOURCE) {
|
|
|
|
if (trylater_delay > NFS_TRYLATERDEL)
|
|
|
|
trylater_delay = NFS_TRYLATERDEL;
|
|
|
|
waituntil = NFSD_MONOSEC + trylater_delay;
|
|
|
|
while (NFSD_MONOSEC < waituntil)
|
2010-04-24 22:52:14 +00:00
|
|
|
(void) nfs_catnap(PZERO, 0, "nfstry");
|
2009-05-04 15:23:58 +00:00
|
|
|
trylater_delay *= 2;
|
2012-12-08 22:52:39 +00:00
|
|
|
if (slot != -1) {
|
|
|
|
mtx_lock(&sep->nfsess_mtx);
|
|
|
|
sep->nfsess_slotseq[slot]++;
|
|
|
|
*nd->nd_slotseq = txdr_unsigned(
|
|
|
|
sep->nfsess_slotseq[slot]);
|
|
|
|
mtx_unlock(&sep->nfsess_mtx);
|
|
|
|
}
|
2011-06-22 21:10:12 +00:00
|
|
|
m_freem(nd->nd_mrep);
|
|
|
|
nd->nd_mrep = NULL;
|
2009-05-04 15:23:58 +00:00
|
|
|
goto tryagain;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the File Handle was stale, invalidate the
|
|
|
|
* lookup cache, just in case.
|
|
|
|
* (vp != NULL implies a client side call)
|
|
|
|
*/
|
|
|
|
if (nd->nd_repstat == ESTALE && vp != NULL) {
|
|
|
|
cache_purge(vp);
|
|
|
|
if (ncl_call_invalcaches != NULL)
|
|
|
|
(*ncl_call_invalcaches)(vp);
|
|
|
|
}
|
|
|
|
}
|
2012-12-08 22:52:39 +00:00
|
|
|
if ((nd->nd_flag & ND_NFSV4) != 0) {
|
|
|
|
/* Free the slot, as required. */
|
|
|
|
if (freeslot != -1)
|
|
|
|
nfsv4_freeslot(sep, freeslot);
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
2012-12-08 22:52:39 +00:00
|
|
|
* If this op is Putfh, throw its results away.
|
2009-05-04 15:23:58 +00:00
|
|
|
*/
|
2012-12-08 22:52:39 +00:00
|
|
|
if (j >= 10000)
|
|
|
|
NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
|
|
|
|
if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
|
2009-05-04 15:23:58 +00:00
|
|
|
NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
|
|
|
|
i = fxdr_unsigned(int, *tl++);
|
|
|
|
j = fxdr_unsigned(int, *tl);
|
2012-12-08 22:52:39 +00:00
|
|
|
if (j >= 10000)
|
|
|
|
NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
|
|
|
|
j);
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* All Compounds that do an Op that must
|
|
|
|
* be in sequence consist of NFSV4OP_PUTFH
|
|
|
|
* followed by one of these. As such, we
|
|
|
|
* can determine if the seqid# should be
|
|
|
|
* incremented, here.
|
|
|
|
*/
|
|
|
|
if ((i == NFSV4OP_OPEN ||
|
|
|
|
i == NFSV4OP_OPENCONFIRM ||
|
|
|
|
i == NFSV4OP_OPENDOWNGRADE ||
|
|
|
|
i == NFSV4OP_CLOSE ||
|
|
|
|
i == NFSV4OP_LOCK ||
|
|
|
|
i == NFSV4OP_LOCKU) &&
|
|
|
|
(j == 0 ||
|
|
|
|
(j != NFSERR_STALECLIENTID &&
|
|
|
|
j != NFSERR_STALESTATEID &&
|
|
|
|
j != NFSERR_BADSTATEID &&
|
|
|
|
j != NFSERR_BADSEQID &&
|
|
|
|
j != NFSERR_BADXDR &&
|
|
|
|
j != NFSERR_RESOURCE &&
|
|
|
|
j != NFSERR_NOFILEHANDLE)))
|
|
|
|
nd->nd_flag |= ND_INCRSEQID;
|
|
|
|
}
|
2012-12-08 22:52:39 +00:00
|
|
|
/*
|
|
|
|
* If this op's status is non-zero, mark
|
|
|
|
* that there is no more data to process.
|
|
|
|
*/
|
|
|
|
if (j)
|
|
|
|
nd->nd_flag |= ND_NOMOREDATA;
|
2009-05-04 15:23:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If R_DONTRECOVER is set, replace the stale error
|
|
|
|
* reply, so that recovery isn't initiated.
|
|
|
|
*/
|
|
|
|
if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
|
2012-12-08 22:52:39 +00:00
|
|
|
nd->nd_repstat == NFSERR_BADSESSION ||
|
2009-05-04 15:23:58 +00:00
|
|
|
nd->nd_repstat == NFSERR_STALESTATEID) &&
|
|
|
|
rep != NULL && (rep->r_flags & R_DONTRECOVER))
|
|
|
|
nd->nd_repstat = NFSERR_STALEDONTRECOVER;
|
|
|
|
}
|
|
|
|
}
|
2009-05-16 03:12:55 +00:00
|
|
|
|
2011-06-18 23:02:53 +00:00
|
|
|
#ifdef KDTRACE_HOOKS
|
|
|
|
if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) {
|
|
|
|
uint32_t probe_id;
|
|
|
|
int probe_procnum;
|
|
|
|
|
|
|
|
if (nd->nd_flag & ND_NFSV4) {
|
|
|
|
probe_id = nfscl_nfs4_done_probes[nd->nd_procnum];
|
|
|
|
probe_procnum = nd->nd_procnum;
|
|
|
|
} else if (nd->nd_flag & ND_NFSV3) {
|
|
|
|
probe_id = nfscl_nfs3_done_probes[procnum];
|
|
|
|
probe_procnum = procnum;
|
|
|
|
} else {
|
|
|
|
probe_id = nfscl_nfs2_done_probes[nd->nd_procnum];
|
|
|
|
probe_procnum = procnum;
|
|
|
|
}
|
|
|
|
if (probe_id != 0)
|
|
|
|
(dtrace_nfscl_nfs234_done_probe)(probe_id, vp,
|
|
|
|
nd->nd_mreq, cred, probe_procnum, 0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-05-16 03:12:55 +00:00
|
|
|
m_freem(nd->nd_mreq);
|
2013-07-09 01:05:28 +00:00
|
|
|
if (usegssname == 0)
|
|
|
|
AUTH_DESTROY(auth);
|
2009-05-16 03:12:55 +00:00
|
|
|
if (rep != NULL)
|
|
|
|
FREE((caddr_t)rep, M_NFSDREQ);
|
2009-07-12 17:07:35 +00:00
|
|
|
if (set_sigset)
|
|
|
|
newnfs_restore_sigmask(td, &oldset);
|
2009-05-16 03:12:55 +00:00
|
|
|
return (0);
|
2009-05-04 15:23:58 +00:00
|
|
|
nfsmout:
|
|
|
|
mbuf_freem(nd->nd_mrep);
|
|
|
|
mbuf_freem(nd->nd_mreq);
|
2013-07-09 01:05:28 +00:00
|
|
|
if (usegssname == 0)
|
|
|
|
AUTH_DESTROY(auth);
|
2009-05-04 15:23:58 +00:00
|
|
|
if (rep != NULL)
|
|
|
|
FREE((caddr_t)rep, M_NFSDREQ);
|
2009-07-12 17:07:35 +00:00
|
|
|
if (set_sigset)
|
|
|
|
newnfs_restore_sigmask(td, &oldset);
|
2009-05-04 15:23:58 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
|
|
|
|
* wait for all requests to complete. This is used by forced unmounts
|
|
|
|
* to terminate any outstanding RPCs.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
newnfs_nmcancelreqs(struct nfsmount *nmp)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (nmp->nm_sockreq.nr_client != NULL)
|
|
|
|
CLNT_CLOSE(nmp->nm_sockreq.nr_client);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Any signal that can interrupt an NFS operation in an intr mount
|
|
|
|
* should be added to this set. SIGSTOP and SIGKILL cannot be masked.
|
|
|
|
*/
|
|
|
|
int newnfs_sig_set[] = {
|
|
|
|
SIGINT,
|
|
|
|
SIGTERM,
|
|
|
|
SIGHUP,
|
|
|
|
SIGKILL,
|
|
|
|
SIGQUIT
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if one of the signals in our subset is pending on
|
|
|
|
* the process (in an intr mount).
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nfs_sig_pending(sigset_t set)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2016-04-19 23:48:27 +00:00
|
|
|
for (i = 0 ; i < nitems(newnfs_sig_set); i++)
|
2009-05-04 15:23:58 +00:00
|
|
|
if (SIGISMEMBER(set, newnfs_sig_set[i]))
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The set/restore sigmask functions are used to (temporarily) overwrite
|
2013-02-06 17:06:51 +00:00
|
|
|
* the thread td_sigmask during an RPC call (for example). These are also
|
2009-05-04 15:23:58 +00:00
|
|
|
* used in other places in the NFS client that might tsleep().
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
newnfs_set_sigmask(struct thread *td, sigset_t *oldset)
|
|
|
|
{
|
|
|
|
sigset_t newset;
|
|
|
|
int i;
|
|
|
|
struct proc *p;
|
|
|
|
|
|
|
|
SIGFILLSET(newset);
|
|
|
|
if (td == NULL)
|
|
|
|
td = curthread; /* XXX */
|
|
|
|
p = td->td_proc;
|
|
|
|
/* Remove the NFS set of signals from newset */
|
|
|
|
PROC_LOCK(p);
|
|
|
|
mtx_lock(&p->p_sigacts->ps_mtx);
|
2016-04-19 23:48:27 +00:00
|
|
|
for (i = 0 ; i < nitems(newnfs_sig_set); i++) {
|
2009-05-04 15:23:58 +00:00
|
|
|
/*
|
|
|
|
* But make sure we leave the ones already masked
|
|
|
|
* by the process, ie. remove the signal from the
|
|
|
|
* temporary signalmask only if it wasn't already
|
|
|
|
* in p_sigmask.
|
|
|
|
*/
|
|
|
|
if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) &&
|
|
|
|
!SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i]))
|
|
|
|
SIGDELSET(newset, newnfs_sig_set[i]);
|
|
|
|
}
|
|
|
|
mtx_unlock(&p->p_sigacts->ps_mtx);
|
2013-02-06 17:06:51 +00:00
|
|
|
kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
|
|
|
|
SIGPROCMASK_PROC_LOCKED);
|
2009-05-04 15:23:58 +00:00
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
newnfs_restore_sigmask(struct thread *td, sigset_t *set)
|
|
|
|
{
|
|
|
|
if (td == NULL)
|
|
|
|
td = curthread; /* XXX */
|
|
|
|
kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
|
|
|
|
* old one after msleep() returns.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
|
|
|
|
{
|
|
|
|
sigset_t oldset;
|
|
|
|
int error;
|
|
|
|
struct proc *p;
|
|
|
|
|
|
|
|
if ((priority & PCATCH) == 0)
|
|
|
|
return msleep(ident, mtx, priority, wmesg, timo);
|
|
|
|
if (td == NULL)
|
|
|
|
td = curthread; /* XXX */
|
|
|
|
newnfs_set_sigmask(td, &oldset);
|
|
|
|
error = msleep(ident, mtx, priority, wmesg, timo);
|
|
|
|
newnfs_restore_sigmask(td, &oldset);
|
|
|
|
p = td->td_proc;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test for a termination condition pending on the process.
|
|
|
|
* This is used for NFSMNT_INT mounts.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
newnfs_sigintr(struct nfsmount *nmp, struct thread *td)
|
|
|
|
{
|
|
|
|
struct proc *p;
|
|
|
|
sigset_t tmpset;
|
|
|
|
|
|
|
|
/* Terminate all requests while attempting a forced unmount. */
|
|
|
|
if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
|
|
|
|
return (EIO);
|
|
|
|
if (!(nmp->nm_flag & NFSMNT_INT))
|
|
|
|
return (0);
|
|
|
|
if (td == NULL)
|
|
|
|
return (0);
|
|
|
|
p = td->td_proc;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
tmpset = p->p_siglist;
|
|
|
|
SIGSETOR(tmpset, td->td_siglist);
|
|
|
|
SIGSETNAND(tmpset, td->td_sigmask);
|
|
|
|
mtx_lock(&p->p_sigacts->ps_mtx);
|
|
|
|
SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
|
|
|
|
mtx_unlock(&p->p_sigacts->ps_mtx);
|
|
|
|
if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
|
|
|
|
&& nfs_sig_pending(tmpset)) {
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
return (EINTR);
|
|
|
|
}
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nfs_msg(struct thread *td, const char *server, const char *msg, int error)
|
|
|
|
{
|
|
|
|
struct proc *p;
|
|
|
|
|
|
|
|
p = td ? td->td_proc : NULL;
|
|
|
|
if (error) {
|
2014-12-23 00:47:46 +00:00
|
|
|
tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n",
|
2009-05-04 15:23:58 +00:00
|
|
|
server, msg, error);
|
|
|
|
} else {
|
2014-12-23 00:47:46 +00:00
|
|
|
tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
|
2009-05-04 15:23:58 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
|
|
|
|
int error, int flags)
|
|
|
|
{
|
|
|
|
if (nmp == NULL)
|
|
|
|
return;
|
|
|
|
mtx_lock(&nmp->nm_mtx);
|
|
|
|
if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
|
|
|
|
nmp->nm_state |= NFSSTA_TIMEO;
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
|
|
|
|
VQ_NOTRESP, 0);
|
|
|
|
} else
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
mtx_lock(&nmp->nm_mtx);
|
|
|
|
if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
|
|
|
|
nmp->nm_state |= NFSSTA_LOCKTIMEO;
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
|
|
|
|
VQ_NOTRESPLOCK, 0);
|
|
|
|
} else
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
|
|
|
|
int flags, int tprintfmsg)
|
|
|
|
{
|
|
|
|
if (nmp == NULL)
|
|
|
|
return;
|
|
|
|
if (tprintfmsg) {
|
|
|
|
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
mtx_lock(&nmp->nm_mtx);
|
|
|
|
if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
|
|
|
|
nmp->nm_state &= ~NFSSTA_TIMEO;
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
|
|
|
|
VQ_NOTRESP, 1);
|
|
|
|
} else
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
|
|
|
|
mtx_lock(&nmp->nm_mtx);
|
|
|
|
if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
|
|
|
|
nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
|
|
|
|
VQ_NOTRESPLOCK, 1);
|
|
|
|
} else
|
|
|
|
mtx_unlock(&nmp->nm_mtx);
|
|
|
|
}
|
|
|
|
|