2fb03513fc
and server. This replaces the RPC implementation of the NFS client and server with the newer RPC implementation originally developed (actually ported from the userland sunrpc code) to support the NFS Lock Manager. I have tested this code extensively and I believe it is stable and that performance is at least equal to the legacy RPC implementation. The NFS code currently contains support for both the new RPC implementation and the older legacy implementation inherited from the original NFS codebase. The default is to use the new implementation - add the NFS_LEGACYRPC option to fall back to the old code. When I merge this support back to RELENG_7, I will probably change this so that users have to 'opt in' to get the new code. To use RPCSEC_GSS on either client or server, you must build a kernel which includes the KGSSAPI option and the crypto device. On the userland side, you must build at least a new libc, mountd, mount_nfs and gssd. You must install new versions of /etc/rc.d/gssd and /etc/rc.d/nfsd and add 'gssd_enable=YES' to /etc/rc.conf. As long as gssd is running, you should be able to mount an NFS filesystem from a server that requires RPCSEC_GSS authentication. The mount itself can happen without any kerberos credentials but all access to the filesystem will be denied unless the accessing user has a valid ticket file in the standard place (/tmp/krb5cc_<uid>). There is currently no support for situations where the ticket file is in a different place, such as when the user logged in via SSH and has delegated credentials from that login. This restriction is also present in Solaris and Linux. In theory, we could improve this in future, possibly using Brooks Davis' implementation of variant symlinks. Supporting RPCSEC_GSS on a server is nearly as simple. You must create service creds for the server in the form 'nfs/<fqdn>@<REALM>' and install them in /etc/krb5.keytab. The standard heimdal utility ktutil makes this fairly easy. After the service creds have been created, you can add a '-sec=krb5' option to /etc/exports and restart both mountd and nfsd. The only other difference an administrator should notice is that nfsd doesn't fork to create service threads any more. In normal operation, there will be two nfsd processes, one in userland waiting for TCP connections and one in the kernel handling requests. The latter process will create as many kthreads as required - these should be visible via 'top -H'. The code has some support for varying the number of service threads according to load but initially at least, nfsd uses a fixed number of threads according to the value supplied to its '-n' option. Sponsored by: Isilon Systems MFC after: 1 month
2312 lines
55 KiB
C
2312 lines
55 KiB
C
/*-
|
|
* Copyright (c) 2008 Isilon Inc http://www.isilon.com/
|
|
* Authors: Doug Rabson <dfr@rabson.org>
|
|
* Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "opt_inet6.h"
|
|
#include "opt_nfs.h"
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/lockf.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mount.h>
|
|
#if __FreeBSD_version >= 700000
|
|
#include <sys/priv.h>
|
|
#endif
|
|
#include <sys/proc.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/sysent.h>
|
|
#include <sys/sysproto.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/taskqueue.h>
|
|
#include <sys/unistd.h>
|
|
#include <sys/vnode.h>
|
|
|
|
#include <nfs/nfsproto.h>
|
|
#include <nfsclient/nfs.h>
|
|
#include <nfsclient/nfsnode.h>
|
|
|
|
#include <nlm/nlm_prot.h>
|
|
#include <nlm/sm_inter.h>
|
|
#include <nlm/nlm.h>
|
|
#include <rpc/rpc_com.h>
|
|
#include <rpc/rpcb_prot.h>
|
|
|
|
MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager");
|
|
|
|
/*
|
|
* If a host is inactive (and holds no locks) for this amount of
|
|
* seconds, we consider it idle and stop tracking it.
|
|
*/
|
|
#define NLM_IDLE_TIMEOUT 30
|
|
|
|
/*
|
|
* We check the host list for idle every few seconds.
|
|
*/
|
|
#define NLM_IDLE_PERIOD 5
|
|
|
|
/*
|
|
* Support for sysctl vfs.nlm.sysid
|
|
*/
|
|
SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager");
|
|
SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, "");
|
|
|
|
/*
|
|
* Syscall hooks
|
|
*/
|
|
static int nlm_syscall_offset = SYS_nlm_syscall;
|
|
static struct sysent nlm_syscall_prev_sysent;
|
|
#if __FreeBSD_version < 700000
|
|
static struct sysent nlm_syscall_sysent = {
|
|
(sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE,
|
|
(sy_call_t *) nlm_syscall
|
|
};
|
|
#else
|
|
MAKE_SYSENT(nlm_syscall);
|
|
#endif
|
|
static bool_t nlm_syscall_registered = FALSE;
|
|
|
|
/*
|
|
* Debug level passed in from userland. We also support a sysctl hook
|
|
* so that it can be changed on a live system.
|
|
*/
|
|
static int nlm_debug_level;
|
|
SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, "");
|
|
|
|
/*
|
|
* Grace period handling. The value of nlm_grace_threshold is the
|
|
* value of time_uptime after which we are serving requests normally.
|
|
*/
|
|
static time_t nlm_grace_threshold;
|
|
|
|
/*
|
|
* We check for idle hosts if time_uptime is greater than
|
|
* nlm_next_idle_check,
|
|
*/
|
|
static time_t nlm_next_idle_check;
|
|
|
|
/*
|
|
* A socket to use for RPC - shared by all IPv4 RPC clients.
|
|
*/
|
|
static struct socket *nlm_socket;
|
|
|
|
#ifdef INET6
|
|
|
|
/*
|
|
* A socket to use for RPC - shared by all IPv6 RPC clients.
|
|
*/
|
|
static struct socket *nlm_socket6;
|
|
|
|
#endif
|
|
|
|
/*
|
|
* An RPC client handle that can be used to communicate with the local
|
|
* NSM.
|
|
*/
|
|
static CLIENT *nlm_nsm;
|
|
|
|
/*
|
|
* An AUTH handle for the server's creds.
|
|
*/
|
|
static AUTH *nlm_auth;
|
|
|
|
/*
|
|
* A zero timeval for sending async RPC messages.
|
|
*/
|
|
struct timeval nlm_zero_tv = { 0, 0 };
|
|
|
|
/*
|
|
* The local NSM state number
|
|
*/
|
|
int nlm_nsm_state;
|
|
|
|
|
|
/*
|
|
* A lock to protect the host list and waiting lock list.
|
|
*/
|
|
static struct mtx nlm_global_lock;
|
|
|
|
/*
|
|
* Locks:
|
|
* (l) locked by nh_lock
|
|
* (s) only accessed via server RPC which is single threaded
|
|
* (g) locked by nlm_global_lock
|
|
* (c) const until freeing
|
|
* (a) modified using atomic ops
|
|
*/
|
|
|
|
/*
|
|
* A pending client-side lock request, stored on the nlm_waiting_locks
|
|
* list.
|
|
*/
|
|
struct nlm_waiting_lock {
|
|
TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
|
|
bool_t nw_waiting; /* (g) */
|
|
nlm4_lock nw_lock; /* (c) */
|
|
union nfsfh nw_fh; /* (c) */
|
|
struct vnode *nw_vp; /* (c) */
|
|
};
|
|
TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
|
|
|
|
struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
|
|
|
|
/*
|
|
* A pending server-side asynchronous lock request, stored on the
|
|
* nh_pending list of the NLM host.
|
|
*/
|
|
struct nlm_async_lock {
|
|
TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
|
|
struct task af_task; /* (c) async callback details */
|
|
void *af_cookie; /* (l) lock manager cancel token */
|
|
struct vnode *af_vp; /* (l) vnode to lock */
|
|
struct flock af_fl; /* (c) lock details */
|
|
struct nlm_host *af_host; /* (c) host which is locking */
|
|
CLIENT *af_rpc; /* (c) rpc client to send message */
|
|
nlm4_testargs af_granted; /* (c) notification details */
|
|
};
|
|
TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
|
|
|
|
/*
|
|
* NLM host.
|
|
*/
|
|
enum nlm_host_state {
|
|
NLM_UNMONITORED,
|
|
NLM_MONITORED,
|
|
NLM_MONITOR_FAILED,
|
|
NLM_RECOVERING
|
|
};
|
|
|
|
struct nlm_rpc {
|
|
CLIENT *nr_client; /* (l) RPC client handle */
|
|
time_t nr_create_time; /* (l) when client was created */
|
|
};
|
|
|
|
struct nlm_host {
|
|
struct mtx nh_lock;
|
|
volatile u_int nh_refs; /* (a) reference count */
|
|
TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
|
|
char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
|
|
uint32_t nh_sysid; /* (c) our allocaed system ID */
|
|
char nh_sysid_string[10]; /* (c) string rep. of sysid */
|
|
struct sockaddr_storage nh_addr; /* (s) remote address of host */
|
|
struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */
|
|
struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */
|
|
rpcvers_t nh_vers; /* (s) NLM version of host */
|
|
int nh_state; /* (s) last seen NSM state of host */
|
|
enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
|
|
time_t nh_idle_timeout; /* (s) Time at which host is idle */
|
|
struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
|
|
struct nlm_async_lock_list nh_pending; /* (l) pending async locks */
|
|
struct nlm_async_lock_list nh_finished; /* (l) finished async locks */
|
|
};
|
|
TAILQ_HEAD(nlm_host_list, nlm_host);
|
|
|
|
static struct nlm_host_list nlm_hosts; /* (g) */
|
|
static uint32_t nlm_next_sysid = 1; /* (g) */
|
|
|
|
static void nlm_host_unmonitor(struct nlm_host *);
|
|
|
|
/**********************************************************************/
|
|
|
|
/*
|
|
* Initialise NLM globals.
|
|
*/
|
|
static void
|
|
nlm_init(void *dummy)
|
|
{
|
|
int error;
|
|
|
|
mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
|
|
TAILQ_INIT(&nlm_waiting_locks);
|
|
TAILQ_INIT(&nlm_hosts);
|
|
|
|
error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent,
|
|
&nlm_syscall_prev_sysent);
|
|
if (error)
|
|
printf("Can't register NLM syscall\n");
|
|
else
|
|
nlm_syscall_registered = TRUE;
|
|
}
|
|
SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL);
|
|
|
|
static void
|
|
nlm_uninit(void *dummy)
|
|
{
|
|
|
|
if (nlm_syscall_registered)
|
|
syscall_deregister(&nlm_syscall_offset,
|
|
&nlm_syscall_prev_sysent);
|
|
}
|
|
SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL);
|
|
|
|
/*
|
|
* Copy a struct netobj.
|
|
*/
|
|
void
|
|
nlm_copy_netobj(struct netobj *dst, struct netobj *src,
|
|
struct malloc_type *type)
|
|
{
|
|
|
|
dst->n_len = src->n_len;
|
|
dst->n_bytes = malloc(src->n_len, type, M_WAITOK);
|
|
memcpy(dst->n_bytes, src->n_bytes, src->n_len);
|
|
}
|
|
|
|
/*
|
|
* Create an RPC client handle for the given (address,prog,vers)
|
|
* triple using UDP.
|
|
*/
|
|
static CLIENT *
|
|
nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
|
|
{
|
|
char *wchan = "nlmrcv";
|
|
const char* protofmly;
|
|
struct sockaddr_storage ss;
|
|
struct socket *so;
|
|
CLIENT *rpcb;
|
|
struct timeval timo;
|
|
RPCB parms;
|
|
char *uaddr;
|
|
enum clnt_stat stat = RPC_SUCCESS;
|
|
int rpcvers = RPCBVERS4;
|
|
bool_t do_tcp = FALSE;
|
|
struct portmap mapping;
|
|
u_short port = 0;
|
|
|
|
/*
|
|
* First we need to contact the remote RPCBIND service to find
|
|
* the right port.
|
|
*/
|
|
memcpy(&ss, sa, sa->sa_len);
|
|
switch (ss.ss_family) {
|
|
case AF_INET:
|
|
((struct sockaddr_in *)&ss)->sin_port = htons(111);
|
|
protofmly = "inet";
|
|
so = nlm_socket;
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
|
|
protofmly = "inet6";
|
|
so = nlm_socket6;
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
/*
|
|
* Unsupported address family - fail.
|
|
*/
|
|
return (NULL);
|
|
}
|
|
|
|
rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
|
|
RPCBPROG, rpcvers, 0, 0);
|
|
if (!rpcb)
|
|
return (NULL);
|
|
|
|
try_tcp:
|
|
parms.r_prog = prog;
|
|
parms.r_vers = vers;
|
|
if (do_tcp)
|
|
parms.r_netid = "tcp";
|
|
else
|
|
parms.r_netid = "udp";
|
|
parms.r_addr = "";
|
|
parms.r_owner = "";
|
|
|
|
/*
|
|
* Use the default timeout.
|
|
*/
|
|
timo.tv_sec = 25;
|
|
timo.tv_usec = 0;
|
|
again:
|
|
switch (rpcvers) {
|
|
case RPCBVERS4:
|
|
case RPCBVERS:
|
|
/*
|
|
* Try RPCBIND 4 then 3.
|
|
*/
|
|
uaddr = NULL;
|
|
stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
|
|
(xdrproc_t) xdr_rpcb, &parms,
|
|
(xdrproc_t) xdr_wrapstring, &uaddr, timo);
|
|
if (stat == RPC_PROGVERSMISMATCH) {
|
|
if (rpcvers == RPCBVERS4)
|
|
rpcvers = RPCBVERS;
|
|
else if (rpcvers == RPCBVERS)
|
|
rpcvers = PMAPVERS;
|
|
CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
|
|
goto again;
|
|
} else if (stat == RPC_SUCCESS) {
|
|
/*
|
|
* We have a reply from the remote RPCBIND - turn it
|
|
* into an appropriate address and make a new client
|
|
* that can talk to the remote NLM.
|
|
*
|
|
* XXX fixup IPv6 scope ID.
|
|
*/
|
|
struct netbuf *a;
|
|
a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
|
|
if (!a) {
|
|
CLNT_DESTROY(rpcb);
|
|
return (NULL);
|
|
}
|
|
memcpy(&ss, a->buf, a->len);
|
|
free(a->buf, M_RPC);
|
|
free(a, M_RPC);
|
|
xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
|
|
}
|
|
break;
|
|
case PMAPVERS:
|
|
/*
|
|
* Try portmap.
|
|
*/
|
|
mapping.pm_prog = parms.r_prog;
|
|
mapping.pm_vers = parms.r_vers;
|
|
mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
|
|
mapping.pm_port = 0;
|
|
|
|
stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
|
|
(xdrproc_t) xdr_portmap, &mapping,
|
|
(xdrproc_t) xdr_u_short, &port, timo);
|
|
|
|
if (stat == RPC_SUCCESS) {
|
|
switch (ss.ss_family) {
|
|
case AF_INET:
|
|
((struct sockaddr_in *)&ss)->sin_port =
|
|
htons(port);
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
((struct sockaddr_in6 *)&ss)->sin6_port =
|
|
htons(port);
|
|
break;
|
|
#endif
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
panic("invalid rpcvers %d", rpcvers);
|
|
}
|
|
/*
|
|
* We may have a positive response from the portmapper, but the NLM
|
|
* service was not found. Make sure we received a valid port.
|
|
*/
|
|
switch (ss.ss_family) {
|
|
case AF_INET:
|
|
port = ((struct sockaddr_in *)&ss)->sin_port;
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
port = ((struct sockaddr_in6 *)&ss)->sin6_port;
|
|
break;
|
|
#endif
|
|
}
|
|
if (stat != RPC_SUCCESS || !port) {
|
|
/*
|
|
* If we were able to talk to rpcbind or portmap, but the udp
|
|
* variant wasn't available, ask about tcp.
|
|
*
|
|
* XXX - We could also check for a TCP portmapper, but
|
|
* if the host is running a portmapper at all, we should be able
|
|
* to hail it over UDP.
|
|
*/
|
|
if (stat == RPC_SUCCESS && !do_tcp) {
|
|
do_tcp = TRUE;
|
|
goto try_tcp;
|
|
}
|
|
|
|
/* Otherwise, bad news. */
|
|
printf("NLM: failed to contact remote rpcbind, "
|
|
"stat = %d, port = %d\n",
|
|
(int) stat, port);
|
|
CLNT_DESTROY(rpcb);
|
|
return (NULL);
|
|
}
|
|
|
|
if (do_tcp) {
|
|
/*
|
|
* Destroy the UDP client we used to speak to rpcbind and
|
|
* recreate as a TCP client.
|
|
*/
|
|
struct netconfig *nconf = NULL;
|
|
|
|
CLNT_DESTROY(rpcb);
|
|
|
|
switch (ss.ss_family) {
|
|
case AF_INET:
|
|
nconf = getnetconfigent("tcp");
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
nconf = getnetconfigent("tcp6");
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
|
|
prog, vers, 0, 0);
|
|
CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
|
|
rpcb->cl_auth = nlm_auth;
|
|
|
|
} else {
|
|
/*
|
|
* Re-use the client we used to speak to rpcbind.
|
|
*/
|
|
CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
|
|
CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
|
|
CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
|
|
CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
|
|
rpcb->cl_auth = nlm_auth;
|
|
}
|
|
|
|
return (rpcb);
|
|
}
|
|
|
|
/*
|
|
* This async callback after when an async lock request has been
|
|
* granted. We notify the host which initiated the request.
|
|
*/
|
|
static void
|
|
nlm_lock_callback(void *arg, int pending)
|
|
{
|
|
struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
|
|
struct rpc_callextra ext;
|
|
|
|
if (nlm_debug_level >= 2)
|
|
printf("NLM: async lock %p for %s (sysid %d) granted\n",
|
|
af, af->af_host->nh_caller_name,
|
|
af->af_host->nh_sysid);
|
|
|
|
/*
|
|
* Send the results back to the host.
|
|
*
|
|
* Note: there is a possible race here with nlm_host_notify
|
|
* destroying the RPC client. To avoid problems, the first
|
|
* thing nlm_host_notify does is to cancel pending async lock
|
|
* requests.
|
|
*/
|
|
memset(&ext, 0, sizeof(ext));
|
|
ext.rc_auth = nlm_auth;
|
|
if (af->af_host->nh_vers == NLM_VERS4) {
|
|
nlm4_granted_msg_4(&af->af_granted,
|
|
NULL, af->af_rpc, &ext, nlm_zero_tv);
|
|
} else {
|
|
/*
|
|
* Back-convert to legacy protocol
|
|
*/
|
|
nlm_testargs granted;
|
|
granted.cookie = af->af_granted.cookie;
|
|
granted.exclusive = af->af_granted.exclusive;
|
|
granted.alock.caller_name =
|
|
af->af_granted.alock.caller_name;
|
|
granted.alock.fh = af->af_granted.alock.fh;
|
|
granted.alock.oh = af->af_granted.alock.oh;
|
|
granted.alock.svid = af->af_granted.alock.svid;
|
|
granted.alock.l_offset =
|
|
af->af_granted.alock.l_offset;
|
|
granted.alock.l_len =
|
|
af->af_granted.alock.l_len;
|
|
|
|
nlm_granted_msg_1(&granted,
|
|
NULL, af->af_rpc, &ext, nlm_zero_tv);
|
|
}
|
|
|
|
/*
|
|
* Move this entry to the nh_finished list. Someone else will
|
|
* free it later - its too hard to do it here safely without
|
|
* racing with cancel.
|
|
*
|
|
* XXX possibly we should have a third "granted sent but not
|
|
* ack'ed" list so that we can re-send the granted message.
|
|
*/
|
|
mtx_lock(&af->af_host->nh_lock);
|
|
TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link);
|
|
TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link);
|
|
mtx_unlock(&af->af_host->nh_lock);
|
|
}
|
|
|
|
/*
|
|
* Free an async lock request. The request must have been removed from
|
|
* any list.
|
|
*/
|
|
static void
|
|
nlm_free_async_lock(struct nlm_async_lock *af)
|
|
{
|
|
/*
|
|
* Free an async lock.
|
|
*/
|
|
if (af->af_rpc)
|
|
CLNT_RELEASE(af->af_rpc);
|
|
xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
|
|
if (af->af_vp)
|
|
vrele(af->af_vp);
|
|
free(af, M_NLM);
|
|
}
|
|
|
|
/*
|
|
* Cancel our async request - this must be called with
|
|
* af->nh_host->nh_lock held. This is slightly complicated by a
|
|
* potential race with our own callback. If we fail to cancel the
|
|
* lock, it must already have been granted - we make sure our async
|
|
* task has completed by calling taskqueue_drain in this case.
|
|
*/
|
|
static int
|
|
nlm_cancel_async_lock(struct nlm_async_lock *af)
|
|
{
|
|
struct nlm_host *host = af->af_host;
|
|
int error;
|
|
|
|
mtx_assert(&host->nh_lock, MA_OWNED);
|
|
|
|
mtx_unlock(&host->nh_lock);
|
|
|
|
error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl,
|
|
F_REMOTE, NULL, &af->af_cookie);
|
|
|
|
if (error) {
|
|
/*
|
|
* We failed to cancel - make sure our callback has
|
|
* completed before we continue.
|
|
*/
|
|
taskqueue_drain(taskqueue_thread, &af->af_task);
|
|
}
|
|
|
|
mtx_lock(&host->nh_lock);
|
|
|
|
if (!error) {
|
|
if (nlm_debug_level >= 2)
|
|
printf("NLM: async lock %p for %s (sysid %d) "
|
|
"cancelled\n",
|
|
af, host->nh_caller_name, host->nh_sysid);
|
|
|
|
/*
|
|
* Remove from the nh_pending list and free now that
|
|
* we are safe from the callback.
|
|
*/
|
|
TAILQ_REMOVE(&host->nh_pending, af, af_link);
|
|
mtx_unlock(&host->nh_lock);
|
|
nlm_free_async_lock(af);
|
|
mtx_lock(&host->nh_lock);
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
nlm_free_finished_locks(struct nlm_host *host)
|
|
{
|
|
struct nlm_async_lock *af;
|
|
|
|
mtx_lock(&host->nh_lock);
|
|
while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) {
|
|
TAILQ_REMOVE(&host->nh_finished, af, af_link);
|
|
mtx_unlock(&host->nh_lock);
|
|
nlm_free_async_lock(af);
|
|
mtx_lock(&host->nh_lock);
|
|
}
|
|
mtx_unlock(&host->nh_lock);
|
|
}
|
|
|
|
/*
|
|
* Free resources used by a host. This is called after the reference
|
|
* count has reached zero so it doesn't need to worry about locks.
|
|
*/
|
|
static void
|
|
nlm_host_destroy(struct nlm_host *host)
|
|
{
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_REMOVE(&nlm_hosts, host, nh_link);
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
if (host->nh_srvrpc.nr_client)
|
|
CLNT_RELEASE(host->nh_srvrpc.nr_client);
|
|
if (host->nh_clntrpc.nr_client)
|
|
CLNT_RELEASE(host->nh_clntrpc.nr_client);
|
|
mtx_destroy(&host->nh_lock);
|
|
sysctl_ctx_free(&host->nh_sysctl);
|
|
free(host, M_NLM);
|
|
}
|
|
|
|
#ifdef NFSCLIENT
|
|
|
|
/*
|
|
* Thread start callback for client lock recovery
|
|
*/
|
|
static void
|
|
nlm_client_recovery_start(void *arg)
|
|
{
|
|
struct nlm_host *host = (struct nlm_host *) arg;
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: client lock recovery for %s started\n",
|
|
host->nh_caller_name);
|
|
|
|
nlm_client_recovery(host);
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: client lock recovery for %s completed\n",
|
|
host->nh_caller_name);
|
|
|
|
host->nh_monstate = NLM_MONITORED;
|
|
nlm_host_release(host);
|
|
|
|
kthread_exit();
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* This is called when we receive a host state change notification. We
|
|
* unlock any active locks owned by the host. When rpc.lockd is
|
|
* shutting down, this function is called with newstate set to zero
|
|
* which allows us to cancel any pending async locks and clear the
|
|
* locking state.
|
|
*/
|
|
static void
|
|
nlm_host_notify(struct nlm_host *host, int newstate)
|
|
{
|
|
struct nlm_async_lock *af;
|
|
|
|
if (newstate) {
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: host %s (sysid %d) rebooted, new "
|
|
"state is %d\n",
|
|
host->nh_caller_name, host->nh_sysid, newstate);
|
|
}
|
|
|
|
/*
|
|
* Cancel any pending async locks for this host.
|
|
*/
|
|
mtx_lock(&host->nh_lock);
|
|
while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) {
|
|
/*
|
|
* nlm_cancel_async_lock will remove the entry from
|
|
* nh_pending and free it.
|
|
*/
|
|
nlm_cancel_async_lock(af);
|
|
}
|
|
mtx_unlock(&host->nh_lock);
|
|
nlm_free_finished_locks(host);
|
|
|
|
/*
|
|
* The host just rebooted - trash its locks.
|
|
*/
|
|
lf_clearremotesys(host->nh_sysid);
|
|
host->nh_state = newstate;
|
|
|
|
#ifdef NFSCLIENT
|
|
/*
|
|
* If we have any remote locks for this host (i.e. it
|
|
* represents a remote NFS server that our local NFS client
|
|
* has locks for), start a recovery thread.
|
|
*/
|
|
if (newstate != 0
|
|
&& host->nh_monstate != NLM_RECOVERING
|
|
&& lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
|
|
struct thread *td;
|
|
host->nh_monstate = NLM_RECOVERING;
|
|
refcount_acquire(&host->nh_refs);
|
|
kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
|
|
"NFS lock recovery for %s", host->nh_caller_name);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Sysctl handler to count the number of locks for a sysid.
|
|
*/
|
|
static int
|
|
nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct nlm_host *host;
|
|
int count;
|
|
|
|
host = oidp->oid_arg1;
|
|
count = lf_countlocks(host->nh_sysid);
|
|
return sysctl_handle_int(oidp, &count, 0, req);
|
|
}
|
|
|
|
/*
|
|
* Sysctl handler to count the number of client locks for a sysid.
|
|
*/
|
|
static int
|
|
nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct nlm_host *host;
|
|
int count;
|
|
|
|
host = oidp->oid_arg1;
|
|
count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
|
|
return sysctl_handle_int(oidp, &count, 0, req);
|
|
}
|
|
|
|
/*
|
|
* Create a new NLM host.
|
|
*/
|
|
static struct nlm_host *
|
|
nlm_create_host(const char* caller_name)
|
|
{
|
|
struct nlm_host *host;
|
|
struct sysctl_oid *oid;
|
|
|
|
mtx_assert(&nlm_global_lock, MA_OWNED);
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: new host %s (sysid %d)\n",
|
|
caller_name, nlm_next_sysid);
|
|
host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
|
|
if (!host)
|
|
return (NULL);
|
|
mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
|
|
host->nh_refs = 1;
|
|
strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
|
|
host->nh_sysid = nlm_next_sysid++;
|
|
snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
|
|
"%d", host->nh_sysid);
|
|
host->nh_vers = 0;
|
|
host->nh_state = 0;
|
|
host->nh_monstate = NLM_UNMONITORED;
|
|
TAILQ_INIT(&host->nh_pending);
|
|
TAILQ_INIT(&host->nh_finished);
|
|
TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
|
|
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
sysctl_ctx_init(&host->nh_sysctl);
|
|
oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
|
|
SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
|
|
OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, "");
|
|
SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
|
|
"hostname", CTLFLAG_RD, host->nh_caller_name, 0, "");
|
|
SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
|
|
"version", CTLFLAG_RD, &host->nh_vers, 0, "");
|
|
SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
|
|
"monitored", CTLFLAG_RD, &host->nh_monstate, 0, "");
|
|
SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
|
|
"lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
|
|
nlm_host_lock_count_sysctl, "I", "");
|
|
SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
|
|
"client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
|
|
nlm_host_client_lock_count_sysctl, "I", "");
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
|
|
return (host);
|
|
}
|
|
|
|
/*
|
|
* Return non-zero if the address parts of the two sockaddrs are the
|
|
* same.
|
|
*/
|
|
static int
|
|
nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b)
|
|
{
|
|
const struct sockaddr_in *a4, *b4;
|
|
#ifdef INET6
|
|
const struct sockaddr_in6 *a6, *b6;
|
|
#endif
|
|
|
|
if (a->sa_family != b->sa_family)
|
|
return (FALSE);
|
|
|
|
switch (a->sa_family) {
|
|
case AF_INET:
|
|
a4 = (const struct sockaddr_in *) a;
|
|
b4 = (const struct sockaddr_in *) b;
|
|
return !memcmp(&a4->sin_addr, &b4->sin_addr,
|
|
sizeof(a4->sin_addr));
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
a6 = (const struct sockaddr_in6 *) a;
|
|
b6 = (const struct sockaddr_in6 *) b;
|
|
return !memcmp(&a6->sin6_addr, &b6->sin6_addr,
|
|
sizeof(a6->sin6_addr));
|
|
#endif
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Check for idle hosts and stop monitoring them. We could also free
|
|
* the host structure here, possibly after a larger timeout but that
|
|
* would require some care to avoid races with
|
|
* e.g. nlm_host_lock_count_sysctl.
|
|
*/
|
|
static void
|
|
nlm_check_idle(void)
|
|
{
|
|
struct nlm_host *host;
|
|
|
|
mtx_assert(&nlm_global_lock, MA_OWNED);
|
|
|
|
if (time_uptime <= nlm_next_idle_check)
|
|
return;
|
|
|
|
nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
|
|
|
|
TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
|
|
if (host->nh_monstate == NLM_MONITORED
|
|
&& time_uptime > host->nh_idle_timeout) {
|
|
mtx_unlock(&nlm_global_lock);
|
|
if (lf_countlocks(host->nh_sysid) > 0
|
|
|| lf_countlocks(NLM_SYSID_CLIENT
|
|
+ host->nh_sysid)) {
|
|
host->nh_idle_timeout =
|
|
time_uptime + NLM_IDLE_TIMEOUT;
|
|
mtx_lock(&nlm_global_lock);
|
|
continue;
|
|
}
|
|
nlm_host_unmonitor(host);
|
|
mtx_lock(&nlm_global_lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Search for an existing NLM host that matches the given name
|
|
* (typically the caller_name element of an nlm4_lock). If none is
|
|
* found, create a new host. If 'addr' is non-NULL, record the remote
|
|
* address of the host so that we can call it back for async
|
|
* responses. If 'vers' is greater than zero then record the NLM
|
|
* program version to use to communicate with this client.
|
|
*/
|
|
struct nlm_host *
|
|
nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
|
|
rpcvers_t vers)
|
|
{
|
|
struct nlm_host *host;
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
|
|
/*
|
|
* The remote host is determined by caller_name.
|
|
*/
|
|
TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
|
|
if (!strcmp(host->nh_caller_name, name))
|
|
break;
|
|
}
|
|
|
|
if (!host) {
|
|
host = nlm_create_host(name);
|
|
if (!host) {
|
|
mtx_unlock(&nlm_global_lock);
|
|
return (NULL);
|
|
}
|
|
}
|
|
refcount_acquire(&host->nh_refs);
|
|
|
|
host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
|
|
|
|
/*
|
|
* If we have an address for the host, record it so that we
|
|
* can send async replies etc.
|
|
*/
|
|
if (addr) {
|
|
|
|
KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
|
|
("Strange remote transport address length"));
|
|
|
|
/*
|
|
* If we have seen an address before and we currently
|
|
* have an RPC client handle, make sure the address is
|
|
* the same, otherwise discard the client handle.
|
|
*/
|
|
if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) {
|
|
if (!nlm_compare_addr(
|
|
(struct sockaddr *) &host->nh_addr,
|
|
addr)
|
|
|| host->nh_vers != vers) {
|
|
CLIENT *client;
|
|
mtx_lock(&host->nh_lock);
|
|
client = host->nh_srvrpc.nr_client;
|
|
host->nh_srvrpc.nr_client = NULL;
|
|
mtx_unlock(&host->nh_lock);
|
|
if (client) {
|
|
CLNT_RELEASE(client);
|
|
}
|
|
}
|
|
}
|
|
memcpy(&host->nh_addr, addr, addr->sa_len);
|
|
host->nh_vers = vers;
|
|
}
|
|
|
|
nlm_check_idle();
|
|
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
return (host);
|
|
}
|
|
|
|
/*
|
|
* Search for an existing NLM host that matches the given remote
|
|
* address. If none is found, create a new host with the requested
|
|
* address and remember 'vers' as the NLM protocol version to use for
|
|
* that host.
|
|
*/
|
|
struct nlm_host *
|
|
nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
|
|
{
|
|
/*
|
|
* Fake up a name using inet_ntop. This buffer is
|
|
* large enough for an IPv6 address.
|
|
*/
|
|
char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
|
|
struct nlm_host *host;
|
|
|
|
switch (addr->sa_family) {
|
|
case AF_INET:
|
|
__rpc_inet_ntop(AF_INET,
|
|
&((const struct sockaddr_in *) addr)->sin_addr,
|
|
tmp, sizeof tmp);
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
__rpc_inet_ntop(AF_INET6,
|
|
&((const struct sockaddr_in6 *) addr)->sin6_addr,
|
|
tmp, sizeof tmp);
|
|
break;
|
|
#endif
|
|
default:
|
|
strcmp(tmp, "<unknown>");
|
|
}
|
|
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
|
|
/*
|
|
* The remote host is determined by caller_name.
|
|
*/
|
|
TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
|
|
if (nlm_compare_addr(addr,
|
|
(const struct sockaddr *) &host->nh_addr))
|
|
break;
|
|
}
|
|
|
|
if (!host) {
|
|
host = nlm_create_host(tmp);
|
|
if (!host) {
|
|
mtx_unlock(&nlm_global_lock);
|
|
return (NULL);
|
|
}
|
|
memcpy(&host->nh_addr, addr, addr->sa_len);
|
|
host->nh_vers = vers;
|
|
}
|
|
refcount_acquire(&host->nh_refs);
|
|
|
|
host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
|
|
|
|
nlm_check_idle();
|
|
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
return (host);
|
|
}
|
|
|
|
/*
|
|
* Find the NLM host that matches the value of 'sysid'. If none
|
|
* exists, return NULL.
|
|
*/
|
|
static struct nlm_host *
|
|
nlm_find_host_by_sysid(int sysid)
|
|
{
|
|
struct nlm_host *host;
|
|
|
|
TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
|
|
if (host->nh_sysid == sysid) {
|
|
refcount_acquire(&host->nh_refs);
|
|
return (host);
|
|
}
|
|
}
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
void nlm_host_release(struct nlm_host *host)
|
|
{
|
|
if (refcount_release(&host->nh_refs)) {
|
|
/*
|
|
* Free the host
|
|
*/
|
|
nlm_host_destroy(host);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Unregister this NLM host with the local NSM due to idleness.
|
|
*/
|
|
static void
|
|
nlm_host_unmonitor(struct nlm_host *host)
|
|
{
|
|
mon_id smmonid;
|
|
sm_stat_res smstat;
|
|
struct timeval timo;
|
|
enum clnt_stat stat;
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: unmonitoring %s (sysid %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
/*
|
|
* We put our assigned system ID value in the priv field to
|
|
* make it simpler to find the host if we are notified of a
|
|
* host restart.
|
|
*/
|
|
smmonid.mon_name = host->nh_caller_name;
|
|
smmonid.my_id.my_name = "localhost";
|
|
smmonid.my_id.my_prog = NLM_PROG;
|
|
smmonid.my_id.my_vers = NLM_SM;
|
|
smmonid.my_id.my_proc = NLM_SM_NOTIFY;
|
|
|
|
timo.tv_sec = 25;
|
|
timo.tv_usec = 0;
|
|
stat = CLNT_CALL(nlm_nsm, SM_UNMON,
|
|
(xdrproc_t) xdr_mon, &smmonid,
|
|
(xdrproc_t) xdr_sm_stat, &smstat, timo);
|
|
|
|
if (stat != RPC_SUCCESS) {
|
|
printf("Failed to contact local NSM - rpc error %d\n", stat);
|
|
return;
|
|
}
|
|
if (smstat.res_stat == stat_fail) {
|
|
printf("Local NSM refuses to unmonitor %s\n",
|
|
host->nh_caller_name);
|
|
return;
|
|
}
|
|
|
|
host->nh_monstate = NLM_UNMONITORED;
|
|
}
|
|
|
|
/*
|
|
* Register this NLM host with the local NSM so that we can be
|
|
* notified if it reboots.
|
|
*/
|
|
void
|
|
nlm_host_monitor(struct nlm_host *host, int state)
|
|
{
|
|
mon smmon;
|
|
sm_stat_res smstat;
|
|
struct timeval timo;
|
|
enum clnt_stat stat;
|
|
|
|
if (state && !host->nh_state) {
|
|
/*
|
|
* This is the first time we have seen an NSM state
|
|
* value for this host. We record it here to help
|
|
* detect host reboots.
|
|
*/
|
|
host->nh_state = state;
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: host %s (sysid %d) has NSM state %d\n",
|
|
host->nh_caller_name, host->nh_sysid, state);
|
|
}
|
|
|
|
mtx_lock(&host->nh_lock);
|
|
if (host->nh_monstate != NLM_UNMONITORED) {
|
|
mtx_unlock(&host->nh_lock);
|
|
return;
|
|
}
|
|
host->nh_monstate = NLM_MONITORED;
|
|
mtx_unlock(&host->nh_lock);
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: monitoring %s (sysid %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
/*
|
|
* We put our assigned system ID value in the priv field to
|
|
* make it simpler to find the host if we are notified of a
|
|
* host restart.
|
|
*/
|
|
smmon.mon_id.mon_name = host->nh_caller_name;
|
|
smmon.mon_id.my_id.my_name = "localhost";
|
|
smmon.mon_id.my_id.my_prog = NLM_PROG;
|
|
smmon.mon_id.my_id.my_vers = NLM_SM;
|
|
smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY;
|
|
memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid));
|
|
|
|
timo.tv_sec = 25;
|
|
timo.tv_usec = 0;
|
|
stat = CLNT_CALL(nlm_nsm, SM_MON,
|
|
(xdrproc_t) xdr_mon, &smmon,
|
|
(xdrproc_t) xdr_sm_stat, &smstat, timo);
|
|
|
|
if (stat != RPC_SUCCESS) {
|
|
printf("Failed to contact local NSM - rpc error %d\n", stat);
|
|
return;
|
|
}
|
|
if (smstat.res_stat == stat_fail) {
|
|
printf("Local NSM refuses to monitor %s\n",
|
|
host->nh_caller_name);
|
|
mtx_lock(&host->nh_lock);
|
|
host->nh_monstate = NLM_MONITOR_FAILED;
|
|
mtx_unlock(&host->nh_lock);
|
|
return;
|
|
}
|
|
|
|
host->nh_monstate = NLM_MONITORED;
|
|
}
|
|
|
|
/*
|
|
* Return an RPC client handle that can be used to talk to the NLM
|
|
* running on the given host.
|
|
*/
|
|
CLIENT *
|
|
nlm_host_get_rpc(struct nlm_host *host, bool_t isserver)
|
|
{
|
|
struct nlm_rpc *rpc;
|
|
CLIENT *client;
|
|
|
|
mtx_lock(&host->nh_lock);
|
|
|
|
if (isserver)
|
|
rpc = &host->nh_srvrpc;
|
|
else
|
|
rpc = &host->nh_clntrpc;
|
|
|
|
/*
|
|
* We can't hold onto RPC handles for too long - the async
|
|
* call/reply protocol used by some NLM clients makes it hard
|
|
* to tell when they change port numbers (e.g. after a
|
|
* reboot). Note that if a client reboots while it isn't
|
|
* holding any locks, it won't bother to notify us. We
|
|
* expire the RPC handles after two minutes.
|
|
*/
|
|
if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) {
|
|
client = rpc->nr_client;
|
|
rpc->nr_client = NULL;
|
|
mtx_unlock(&host->nh_lock);
|
|
CLNT_RELEASE(client);
|
|
mtx_lock(&host->nh_lock);
|
|
}
|
|
|
|
if (!rpc->nr_client) {
|
|
mtx_unlock(&host->nh_lock);
|
|
client = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
|
|
NLM_PROG, host->nh_vers);
|
|
mtx_lock(&host->nh_lock);
|
|
|
|
if (client) {
|
|
if (rpc->nr_client) {
|
|
mtx_unlock(&host->nh_lock);
|
|
CLNT_DESTROY(client);
|
|
mtx_lock(&host->nh_lock);
|
|
} else {
|
|
rpc->nr_client = client;
|
|
rpc->nr_create_time = time_uptime;
|
|
}
|
|
}
|
|
}
|
|
|
|
client = rpc->nr_client;
|
|
if (client)
|
|
CLNT_ACQUIRE(client);
|
|
mtx_unlock(&host->nh_lock);
|
|
|
|
return (client);
|
|
|
|
}
|
|
|
|
int nlm_host_get_sysid(struct nlm_host *host)
|
|
{
|
|
|
|
return (host->nh_sysid);
|
|
}
|
|
|
|
int
|
|
nlm_host_get_state(struct nlm_host *host)
|
|
{
|
|
|
|
return (host->nh_state);
|
|
}
|
|
|
|
void *
|
|
nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp)
|
|
{
|
|
struct nlm_waiting_lock *nw;
|
|
|
|
nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK);
|
|
nw->nw_lock = *lock;
|
|
memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes,
|
|
nw->nw_lock.fh.n_len);
|
|
nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes;
|
|
nw->nw_waiting = TRUE;
|
|
nw->nw_vp = vp;
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link);
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
return nw;
|
|
}
|
|
|
|
void
|
|
nlm_deregister_wait_lock(void *handle)
|
|
{
|
|
struct nlm_waiting_lock *nw = handle;
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
free(nw, M_NLM);
|
|
}
|
|
|
|
int
|
|
nlm_wait_lock(void *handle, int timo)
|
|
{
|
|
struct nlm_waiting_lock *nw = handle;
|
|
int error;
|
|
|
|
/*
|
|
* If the granted message arrived before we got here,
|
|
* nw->nw_waiting will be FALSE - in that case, don't sleep.
|
|
*/
|
|
mtx_lock(&nlm_global_lock);
|
|
error = 0;
|
|
if (nw->nw_waiting)
|
|
error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo);
|
|
TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
|
|
if (error) {
|
|
/*
|
|
* The granted message may arrive after the
|
|
* interrupt/timeout but before we manage to lock the
|
|
* mutex. Detect this by examining nw_lock.
|
|
*/
|
|
if (!nw->nw_waiting)
|
|
error = 0;
|
|
} else {
|
|
/*
|
|
* If nlm_cancel_wait is called, then error will be
|
|
* zero but nw_waiting will still be TRUE. We
|
|
* translate this into EINTR.
|
|
*/
|
|
if (nw->nw_waiting)
|
|
error = EINTR;
|
|
}
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
free(nw, M_NLM);
|
|
|
|
return (error);
|
|
}
|
|
|
|
void
|
|
nlm_cancel_wait(struct vnode *vp)
|
|
{
|
|
struct nlm_waiting_lock *nw;
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
|
|
if (nw->nw_vp == vp) {
|
|
wakeup(nw);
|
|
}
|
|
}
|
|
mtx_unlock(&nlm_global_lock);
|
|
}
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/*
|
|
* Syscall interface with userland.
|
|
*/
|
|
|
|
extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp);
|
|
extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp);
|
|
extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp);
|
|
extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp);
|
|
|
|
static int
|
|
nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs)
|
|
{
|
|
static rpcvers_t versions[] = {
|
|
NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4
|
|
};
|
|
static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = {
|
|
nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4
|
|
};
|
|
static const int version_count = sizeof(versions) / sizeof(versions[0]);
|
|
|
|
SVCXPRT **xprts;
|
|
char netid[16];
|
|
char uaddr[128];
|
|
struct netconfig *nconf;
|
|
int i, j, error;
|
|
|
|
if (!addr_count) {
|
|
printf("NLM: no service addresses given - can't start server");
|
|
return (EINVAL);
|
|
}
|
|
|
|
xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK);
|
|
for (i = 0; i < version_count; i++) {
|
|
for (j = 0; j < addr_count; j++) {
|
|
/*
|
|
* Create transports for the first version and
|
|
* then just register everything else to the
|
|
* same transports.
|
|
*/
|
|
if (i == 0) {
|
|
char *up;
|
|
|
|
error = copyin(&addrs[2*j], &up,
|
|
sizeof(char*));
|
|
if (error)
|
|
goto out;
|
|
error = copyinstr(up, netid, sizeof(netid),
|
|
NULL);
|
|
if (error)
|
|
goto out;
|
|
error = copyin(&addrs[2*j+1], &up,
|
|
sizeof(char*));
|
|
if (error)
|
|
goto out;
|
|
error = copyinstr(up, uaddr, sizeof(uaddr),
|
|
NULL);
|
|
if (error)
|
|
goto out;
|
|
nconf = getnetconfigent(netid);
|
|
if (!nconf) {
|
|
printf("Can't lookup netid %s\n",
|
|
netid);
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
xprts[j] = svc_tp_create(pool, dispatchers[i],
|
|
NLM_PROG, versions[i], uaddr, nconf);
|
|
if (!xprts[j]) {
|
|
printf("NLM: unable to create "
|
|
"(NLM_PROG, %d).\n", versions[i]);
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
freenetconfigent(nconf);
|
|
} else {
|
|
nconf = getnetconfigent(xprts[j]->xp_netid);
|
|
rpcb_unset(NLM_PROG, versions[i], nconf);
|
|
if (!svc_reg(xprts[j], NLM_PROG, versions[i],
|
|
dispatchers[i], nconf)) {
|
|
printf("NLM: can't register "
|
|
"(NLM_PROG, %d)\n", versions[i]);
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
error = 0;
|
|
out:
|
|
free(xprts, M_NLM);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Main server entry point. Contacts the local NSM to get its current
|
|
* state and send SM_UNMON_ALL. Registers the NLM services and then
|
|
* services requests. Does not return until the server is interrupted
|
|
* by a signal.
|
|
*/
|
|
static int
|
|
nlm_server_main(int addr_count, char **addrs)
|
|
{
|
|
struct thread *td = curthread;
|
|
int error;
|
|
SVCPOOL *pool = NULL;
|
|
struct sockopt opt;
|
|
int portlow;
|
|
#ifdef INET6
|
|
struct sockaddr_in6 sin6;
|
|
#endif
|
|
struct sockaddr_in sin;
|
|
my_id id;
|
|
sm_stat smstat;
|
|
struct timeval timo;
|
|
enum clnt_stat stat;
|
|
struct nlm_host *host, *nhost;
|
|
struct nlm_waiting_lock *nw;
|
|
#ifdef NFSCLIENT
|
|
vop_advlock_t *old_nfs_advlock;
|
|
vop_reclaim_t *old_nfs_reclaim;
|
|
#endif
|
|
int v4_used;
|
|
#ifdef INET6
|
|
int v6_used;
|
|
#endif
|
|
|
|
if (nlm_socket) {
|
|
printf("NLM: can't start server - it appears to be running already\n");
|
|
return (EPERM);
|
|
}
|
|
|
|
memset(&opt, 0, sizeof(opt));
|
|
|
|
nlm_socket = NULL;
|
|
error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0,
|
|
td->td_ucred, td);
|
|
if (error) {
|
|
printf("NLM: can't create IPv4 socket - error %d\n", error);
|
|
return (error);
|
|
}
|
|
opt.sopt_dir = SOPT_SET;
|
|
opt.sopt_level = IPPROTO_IP;
|
|
opt.sopt_name = IP_PORTRANGE;
|
|
portlow = IP_PORTRANGE_LOW;
|
|
opt.sopt_val = &portlow;
|
|
opt.sopt_valsize = sizeof(portlow);
|
|
sosetopt(nlm_socket, &opt);
|
|
|
|
#ifdef INET6
|
|
nlm_socket6 = NULL;
|
|
error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0,
|
|
td->td_ucred, td);
|
|
if (error) {
|
|
printf("NLM: can't create IPv6 socket - error %d\n", error);
|
|
goto out;
|
|
return (error);
|
|
}
|
|
opt.sopt_dir = SOPT_SET;
|
|
opt.sopt_level = IPPROTO_IPV6;
|
|
opt.sopt_name = IPV6_PORTRANGE;
|
|
portlow = IPV6_PORTRANGE_LOW;
|
|
opt.sopt_val = &portlow;
|
|
opt.sopt_valsize = sizeof(portlow);
|
|
sosetopt(nlm_socket6, &opt);
|
|
#endif
|
|
|
|
nlm_auth = authunix_create(curthread->td_ucred);
|
|
|
|
#ifdef INET6
|
|
memset(&sin6, 0, sizeof(sin6));
|
|
sin6.sin6_len = sizeof(sin6);
|
|
sin6.sin6_family = AF_INET6;
|
|
sin6.sin6_addr = in6addr_loopback;
|
|
nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS);
|
|
if (!nlm_nsm) {
|
|
#endif
|
|
memset(&sin, 0, sizeof(sin));
|
|
sin.sin_len = sizeof(sin);
|
|
sin.sin_family = AF_INET;
|
|
sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
|
nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG,
|
|
SM_VERS);
|
|
#ifdef INET6
|
|
}
|
|
#endif
|
|
|
|
if (!nlm_nsm) {
|
|
printf("Can't start NLM - unable to contact NSM\n");
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
pool = svcpool_create("NLM", NULL);
|
|
|
|
error = nlm_register_services(pool, addr_count, addrs);
|
|
if (error)
|
|
goto out;
|
|
|
|
memset(&id, 0, sizeof(id));
|
|
id.my_name = "NFS NLM";
|
|
|
|
timo.tv_sec = 25;
|
|
timo.tv_usec = 0;
|
|
stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL,
|
|
(xdrproc_t) xdr_my_id, &id,
|
|
(xdrproc_t) xdr_sm_stat, &smstat, timo);
|
|
|
|
if (stat != RPC_SUCCESS) {
|
|
struct rpc_err err;
|
|
|
|
CLNT_GETERR(nlm_nsm, &err);
|
|
printf("NLM: unexpected error contacting NSM, stat=%d, errno=%d\n",
|
|
stat, err.re_errno);
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
if (nlm_debug_level >= 1)
|
|
printf("NLM: local NSM state is %d\n", smstat.state);
|
|
nlm_nsm_state = smstat.state;
|
|
|
|
#ifdef NFSCLIENT
|
|
old_nfs_advlock = nfs_advlock_p;
|
|
nfs_advlock_p = nlm_advlock;
|
|
old_nfs_reclaim = nfs_reclaim_p;
|
|
nfs_reclaim_p = nlm_reclaim;
|
|
#endif
|
|
|
|
svc_run(pool);
|
|
error = 0;
|
|
|
|
#ifdef NFSCLIENT
|
|
nfs_advlock_p = old_nfs_advlock;
|
|
nfs_reclaim_p = old_nfs_reclaim;
|
|
#endif
|
|
|
|
out:
|
|
if (pool)
|
|
svcpool_destroy(pool);
|
|
|
|
/*
|
|
* We are finished communicating with the NSM.
|
|
*/
|
|
if (nlm_nsm) {
|
|
CLNT_RELEASE(nlm_nsm);
|
|
nlm_nsm = NULL;
|
|
}
|
|
|
|
/*
|
|
* Trash all the existing state so that if the server
|
|
* restarts, it gets a clean slate. This is complicated by the
|
|
* possibility that there may be other threads trying to make
|
|
* client locking requests.
|
|
*
|
|
* First we fake a client reboot notification which will
|
|
* cancel any pending async locks and purge remote lock state
|
|
* from the local lock manager. We release the reference from
|
|
* nlm_hosts to the host (which may remove it from the list
|
|
* and free it). After this phase, the only entries in the
|
|
* nlm_host list should be from other threads performing
|
|
* client lock requests. We arrange to defer closing the
|
|
* sockets until the last RPC client handle is released.
|
|
*/
|
|
v4_used = 0;
|
|
#ifdef INET6
|
|
v6_used = 0;
|
|
#endif
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
|
|
wakeup(nw);
|
|
}
|
|
TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
|
|
mtx_unlock(&nlm_global_lock);
|
|
nlm_host_notify(host, 0);
|
|
nlm_host_release(host);
|
|
mtx_lock(&nlm_global_lock);
|
|
}
|
|
TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
|
|
mtx_lock(&host->nh_lock);
|
|
if (host->nh_srvrpc.nr_client
|
|
|| host->nh_clntrpc.nr_client) {
|
|
if (host->nh_addr.ss_family == AF_INET)
|
|
v4_used++;
|
|
#ifdef INET6
|
|
if (host->nh_addr.ss_family == AF_INET6)
|
|
v6_used++;
|
|
#endif
|
|
/*
|
|
* Note that the rpc over udp code copes
|
|
* correctly with the fact that a socket may
|
|
* be used by many rpc handles.
|
|
*/
|
|
if (host->nh_srvrpc.nr_client)
|
|
CLNT_CONTROL(host->nh_srvrpc.nr_client,
|
|
CLSET_FD_CLOSE, 0);
|
|
if (host->nh_clntrpc.nr_client)
|
|
CLNT_CONTROL(host->nh_clntrpc.nr_client,
|
|
CLSET_FD_CLOSE, 0);
|
|
}
|
|
mtx_unlock(&host->nh_lock);
|
|
}
|
|
mtx_unlock(&nlm_global_lock);
|
|
|
|
AUTH_DESTROY(nlm_auth);
|
|
|
|
if (!v4_used)
|
|
soclose(nlm_socket);
|
|
nlm_socket = NULL;
|
|
#ifdef INET6
|
|
if (!v6_used)
|
|
soclose(nlm_socket6);
|
|
nlm_socket6 = NULL;
|
|
#endif
|
|
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
nlm_syscall(struct thread *td, struct nlm_syscall_args *uap)
|
|
{
|
|
int error;
|
|
|
|
#if __FreeBSD_version >= 700000
|
|
error = priv_check(td, PRIV_NFS_LOCKD);
|
|
#else
|
|
error = suser(td);
|
|
#endif
|
|
if (error)
|
|
return (error);
|
|
|
|
nlm_debug_level = uap->debug_level;
|
|
nlm_grace_threshold = time_uptime + uap->grace_period;
|
|
nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
|
|
|
|
return nlm_server_main(uap->addr_count, uap->addrs);
|
|
}
|
|
|
|
/**********************************************************************/
|
|
|
|
/*
|
|
* NLM implementation details, called from the RPC stubs.
|
|
*/
|
|
|
|
|
|
void
|
|
nlm_sm_notify(struct nlm_sm_status *argp)
|
|
{
|
|
uint32_t sysid;
|
|
struct nlm_host *host;
|
|
|
|
if (nlm_debug_level >= 3)
|
|
printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name);
|
|
memcpy(&sysid, &argp->priv, sizeof(sysid));
|
|
host = nlm_find_host_by_sysid(sysid);
|
|
if (host) {
|
|
nlm_host_notify(host, argp->state);
|
|
nlm_host_release(host);
|
|
}
|
|
}
|
|
|
|
static void
|
|
nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p)
|
|
{
|
|
memcpy(fhp, p->n_bytes, sizeof(fhandle_t));
|
|
}
|
|
|
|
struct vfs_state {
|
|
struct mount *vs_mp;
|
|
struct vnode *vs_vp;
|
|
int vs_vfslocked;
|
|
int vs_vnlocked;
|
|
};
|
|
|
|
static int
|
|
nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp,
|
|
fhandle_t *fhp, struct vfs_state *vs)
|
|
{
|
|
int error, exflags;
|
|
struct ucred *cred = NULL, *credanon;
|
|
|
|
memset(vs, 0, sizeof(*vs));
|
|
|
|
vs->vs_mp = vfs_getvfs(&fhp->fh_fsid);
|
|
if (!vs->vs_mp) {
|
|
return (ESTALE);
|
|
}
|
|
vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp);
|
|
|
|
error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr,
|
|
&exflags, &credanon, NULL, NULL);
|
|
if (error)
|
|
goto out;
|
|
|
|
if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) {
|
|
error = EROFS;
|
|
goto out;
|
|
}
|
|
|
|
error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp);
|
|
if (error)
|
|
goto out;
|
|
vs->vs_vnlocked = TRUE;
|
|
|
|
if (!svc_getcred(rqstp, &cred, NULL)) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
|
|
crfree(cred);
|
|
cred = crhold(credanon);
|
|
}
|
|
|
|
/*
|
|
* Check cred.
|
|
*/
|
|
error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread);
|
|
if (error)
|
|
goto out;
|
|
|
|
#if __FreeBSD_version < 800011
|
|
VOP_UNLOCK(vs->vs_vp, 0, curthread);
|
|
#else
|
|
VOP_UNLOCK(vs->vs_vp, 0);
|
|
#endif
|
|
vs->vs_vnlocked = FALSE;
|
|
|
|
out:
|
|
if (cred)
|
|
crfree(cred);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
nlm_release_vfs_state(struct vfs_state *vs)
|
|
{
|
|
|
|
if (vs->vs_vp) {
|
|
if (vs->vs_vnlocked)
|
|
vput(vs->vs_vp);
|
|
else
|
|
vrele(vs->vs_vp);
|
|
}
|
|
if (vs->vs_mp)
|
|
vfs_rel(vs->vs_mp);
|
|
VFS_UNLOCK_GIANT(vs->vs_vfslocked);
|
|
}
|
|
|
|
static nlm4_stats
|
|
nlm_convert_error(int error)
|
|
{
|
|
|
|
if (error == ESTALE)
|
|
return nlm4_stale_fh;
|
|
else if (error == EROFS)
|
|
return nlm4_rofs;
|
|
else
|
|
return nlm4_failed;
|
|
}
|
|
|
|
int
|
|
nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp,
|
|
CLIENT **rpcp)
|
|
{
|
|
fhandle_t fh;
|
|
struct vfs_state vs;
|
|
struct nlm_host *host, *bhost;
|
|
int error, sysid;
|
|
struct flock fl;
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
memset(&vs, 0, sizeof(vs));
|
|
|
|
host = nlm_find_host_by_name(argp->alock.caller_name,
|
|
svc_getrpccaller(rqstp), rqstp->rq_vers);
|
|
if (!host) {
|
|
result->stat.stat = nlm4_denied_nolocks;
|
|
return (ENOMEM);
|
|
}
|
|
|
|
if (nlm_debug_level >= 3)
|
|
printf("nlm_do_test(): caller_name = %s (sysid = %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
nlm_free_finished_locks(host);
|
|
sysid = host->nh_sysid;
|
|
|
|
nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
|
|
nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
|
|
|
|
if (time_uptime < nlm_grace_threshold) {
|
|
result->stat.stat = nlm4_denied_grace_period;
|
|
goto out;
|
|
}
|
|
|
|
error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
|
|
if (error) {
|
|
result->stat.stat = nlm_convert_error(error);
|
|
goto out;
|
|
}
|
|
|
|
fl.l_start = argp->alock.l_offset;
|
|
fl.l_len = argp->alock.l_len;
|
|
fl.l_pid = argp->alock.svid;
|
|
fl.l_sysid = sysid;
|
|
fl.l_whence = SEEK_SET;
|
|
if (argp->exclusive)
|
|
fl.l_type = F_WRLCK;
|
|
else
|
|
fl.l_type = F_RDLCK;
|
|
error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE);
|
|
if (error) {
|
|
result->stat.stat = nlm4_failed;
|
|
goto out;
|
|
}
|
|
|
|
if (fl.l_type == F_UNLCK) {
|
|
result->stat.stat = nlm4_granted;
|
|
} else {
|
|
result->stat.stat = nlm4_denied;
|
|
result->stat.nlm4_testrply_u.holder.exclusive =
|
|
(fl.l_type == F_WRLCK);
|
|
result->stat.nlm4_testrply_u.holder.svid = fl.l_pid;
|
|
bhost = nlm_find_host_by_sysid(fl.l_sysid);
|
|
if (bhost) {
|
|
/*
|
|
* We don't have any useful way of recording
|
|
* the value of oh used in the original lock
|
|
* request. Ideally, the test reply would have
|
|
* a space for the owning host's name allowing
|
|
* our caller's NLM to keep track.
|
|
*
|
|
* As far as I can see, Solaris uses an eight
|
|
* byte structure for oh which contains a four
|
|
* byte pid encoded in local byte order and
|
|
* the first four bytes of the host
|
|
* name. Linux uses a variable length string
|
|
* 'pid@hostname' in ascii but doesn't even
|
|
* return that in test replies.
|
|
*
|
|
* For the moment, return nothing in oh
|
|
* (already zero'ed above).
|
|
*/
|
|
nlm_host_release(bhost);
|
|
}
|
|
result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start;
|
|
result->stat.nlm4_testrply_u.holder.l_len = fl.l_len;
|
|
}
|
|
|
|
out:
|
|
nlm_release_vfs_state(&vs);
|
|
if (rpcp)
|
|
*rpcp = nlm_host_get_rpc(host, TRUE);
|
|
nlm_host_release(host);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
|
|
bool_t monitor, CLIENT **rpcp)
|
|
{
|
|
fhandle_t fh;
|
|
struct vfs_state vs;
|
|
struct nlm_host *host;
|
|
int error, sysid;
|
|
struct flock fl;
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
memset(&vs, 0, sizeof(vs));
|
|
|
|
host = nlm_find_host_by_name(argp->alock.caller_name,
|
|
svc_getrpccaller(rqstp), rqstp->rq_vers);
|
|
if (!host) {
|
|
result->stat.stat = nlm4_denied_nolocks;
|
|
return (ENOMEM);
|
|
}
|
|
|
|
if (nlm_debug_level >= 3)
|
|
printf("nlm_do_lock(): caller_name = %s (sysid = %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
if (monitor && host->nh_state && argp->state
|
|
&& host->nh_state != argp->state) {
|
|
/*
|
|
* The host rebooted without telling us. Trash its
|
|
* locks.
|
|
*/
|
|
nlm_host_notify(host, argp->state);
|
|
}
|
|
|
|
nlm_free_finished_locks(host);
|
|
sysid = host->nh_sysid;
|
|
|
|
nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
|
|
nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
|
|
|
|
if (time_uptime < nlm_grace_threshold && !argp->reclaim) {
|
|
result->stat.stat = nlm4_denied_grace_period;
|
|
goto out;
|
|
}
|
|
|
|
error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
|
|
if (error) {
|
|
result->stat.stat = nlm_convert_error(error);
|
|
goto out;
|
|
}
|
|
|
|
fl.l_start = argp->alock.l_offset;
|
|
fl.l_len = argp->alock.l_len;
|
|
fl.l_pid = argp->alock.svid;
|
|
fl.l_sysid = sysid;
|
|
fl.l_whence = SEEK_SET;
|
|
if (argp->exclusive)
|
|
fl.l_type = F_WRLCK;
|
|
else
|
|
fl.l_type = F_RDLCK;
|
|
if (argp->block) {
|
|
struct nlm_async_lock *af;
|
|
CLIENT *client;
|
|
|
|
/*
|
|
* First, make sure we can contact the host's NLM.
|
|
*/
|
|
client = nlm_host_get_rpc(host, TRUE);
|
|
if (!client) {
|
|
result->stat.stat = nlm4_failed;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* First we need to check and see if there is an
|
|
* existing blocked lock that matches. This could be a
|
|
* badly behaved client or an RPC re-send. If we find
|
|
* one, just return nlm4_blocked.
|
|
*/
|
|
mtx_lock(&host->nh_lock);
|
|
TAILQ_FOREACH(af, &host->nh_pending, af_link) {
|
|
if (af->af_fl.l_start == fl.l_start
|
|
&& af->af_fl.l_len == fl.l_len
|
|
&& af->af_fl.l_pid == fl.l_pid
|
|
&& af->af_fl.l_type == fl.l_type) {
|
|
break;
|
|
}
|
|
}
|
|
mtx_unlock(&host->nh_lock);
|
|
if (af) {
|
|
CLNT_RELEASE(client);
|
|
result->stat.stat = nlm4_blocked;
|
|
goto out;
|
|
}
|
|
|
|
af = malloc(sizeof(struct nlm_async_lock), M_NLM,
|
|
M_WAITOK|M_ZERO);
|
|
TASK_INIT(&af->af_task, 0, nlm_lock_callback, af);
|
|
af->af_vp = vs.vs_vp;
|
|
af->af_fl = fl;
|
|
af->af_host = host;
|
|
af->af_rpc = client;
|
|
/*
|
|
* We use M_RPC here so that we can xdr_free the thing
|
|
* later.
|
|
*/
|
|
af->af_granted.exclusive = argp->exclusive;
|
|
af->af_granted.alock.caller_name =
|
|
strdup(argp->alock.caller_name, M_RPC);
|
|
nlm_copy_netobj(&af->af_granted.alock.fh,
|
|
&argp->alock.fh, M_RPC);
|
|
nlm_copy_netobj(&af->af_granted.alock.oh,
|
|
&argp->alock.oh, M_RPC);
|
|
af->af_granted.alock.svid = argp->alock.svid;
|
|
af->af_granted.alock.l_offset = argp->alock.l_offset;
|
|
af->af_granted.alock.l_len = argp->alock.l_len;
|
|
|
|
/*
|
|
* Put the entry on the pending list before calling
|
|
* VOP_ADVLOCKASYNC. We do this in case the lock
|
|
* request was blocked (returning EINPROGRESS) but
|
|
* then granted before we manage to run again. The
|
|
* client may receive the granted message before we
|
|
* send our blocked reply but thats their problem.
|
|
*/
|
|
mtx_lock(&host->nh_lock);
|
|
TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link);
|
|
mtx_unlock(&host->nh_lock);
|
|
|
|
error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE,
|
|
&af->af_task, &af->af_cookie);
|
|
|
|
/*
|
|
* If the lock completed synchronously, just free the
|
|
* tracking structure now.
|
|
*/
|
|
if (error != EINPROGRESS) {
|
|
CLNT_RELEASE(af->af_rpc);
|
|
mtx_lock(&host->nh_lock);
|
|
TAILQ_REMOVE(&host->nh_pending, af, af_link);
|
|
mtx_unlock(&host->nh_lock);
|
|
xdr_free((xdrproc_t) xdr_nlm4_testargs,
|
|
&af->af_granted);
|
|
free(af, M_NLM);
|
|
} else {
|
|
if (nlm_debug_level >= 2)
|
|
printf("NLM: pending async lock %p for %s "
|
|
"(sysid %d)\n",
|
|
af, host->nh_caller_name, sysid);
|
|
/*
|
|
* Don't vrele the vnode just yet - this must
|
|
* wait until either the async callback
|
|
* happens or the lock is cancelled.
|
|
*/
|
|
vs.vs_vp = NULL;
|
|
}
|
|
} else {
|
|
error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE);
|
|
}
|
|
|
|
if (error) {
|
|
if (error == EINPROGRESS) {
|
|
result->stat.stat = nlm4_blocked;
|
|
} else if (error == EDEADLK) {
|
|
result->stat.stat = nlm4_deadlck;
|
|
} else if (error == EAGAIN) {
|
|
result->stat.stat = nlm4_denied;
|
|
} else {
|
|
result->stat.stat = nlm4_failed;
|
|
}
|
|
} else {
|
|
if (monitor)
|
|
nlm_host_monitor(host, argp->state);
|
|
result->stat.stat = nlm4_granted;
|
|
}
|
|
|
|
out:
|
|
nlm_release_vfs_state(&vs);
|
|
if (rpcp)
|
|
*rpcp = nlm_host_get_rpc(host, TRUE);
|
|
nlm_host_release(host);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp,
|
|
CLIENT **rpcp)
|
|
{
|
|
fhandle_t fh;
|
|
struct vfs_state vs;
|
|
struct nlm_host *host;
|
|
int error, sysid;
|
|
struct flock fl;
|
|
struct nlm_async_lock *af;
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
memset(&vs, 0, sizeof(vs));
|
|
|
|
host = nlm_find_host_by_name(argp->alock.caller_name,
|
|
svc_getrpccaller(rqstp), rqstp->rq_vers);
|
|
if (!host) {
|
|
result->stat.stat = nlm4_denied_nolocks;
|
|
return (ENOMEM);
|
|
}
|
|
|
|
if (nlm_debug_level >= 3)
|
|
printf("nlm_do_cancel(): caller_name = %s (sysid = %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
nlm_free_finished_locks(host);
|
|
sysid = host->nh_sysid;
|
|
|
|
nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
|
|
nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
|
|
|
|
if (time_uptime < nlm_grace_threshold) {
|
|
result->stat.stat = nlm4_denied_grace_period;
|
|
goto out;
|
|
}
|
|
|
|
error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
|
|
if (error) {
|
|
result->stat.stat = nlm_convert_error(error);
|
|
goto out;
|
|
}
|
|
|
|
fl.l_start = argp->alock.l_offset;
|
|
fl.l_len = argp->alock.l_len;
|
|
fl.l_pid = argp->alock.svid;
|
|
fl.l_sysid = sysid;
|
|
fl.l_whence = SEEK_SET;
|
|
if (argp->exclusive)
|
|
fl.l_type = F_WRLCK;
|
|
else
|
|
fl.l_type = F_RDLCK;
|
|
|
|
/*
|
|
* First we need to try and find the async lock request - if
|
|
* there isn't one, we give up and return nlm4_denied.
|
|
*/
|
|
mtx_lock(&host->nh_lock);
|
|
|
|
TAILQ_FOREACH(af, &host->nh_pending, af_link) {
|
|
if (af->af_fl.l_start == fl.l_start
|
|
&& af->af_fl.l_len == fl.l_len
|
|
&& af->af_fl.l_pid == fl.l_pid
|
|
&& af->af_fl.l_type == fl.l_type) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!af) {
|
|
mtx_unlock(&host->nh_lock);
|
|
result->stat.stat = nlm4_denied;
|
|
goto out;
|
|
}
|
|
|
|
error = nlm_cancel_async_lock(af);
|
|
|
|
if (error) {
|
|
result->stat.stat = nlm4_denied;
|
|
} else {
|
|
result->stat.stat = nlm4_granted;
|
|
}
|
|
|
|
mtx_unlock(&host->nh_lock);
|
|
|
|
out:
|
|
nlm_release_vfs_state(&vs);
|
|
if (rpcp)
|
|
*rpcp = nlm_host_get_rpc(host, TRUE);
|
|
nlm_host_release(host);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp,
|
|
CLIENT **rpcp)
|
|
{
|
|
fhandle_t fh;
|
|
struct vfs_state vs;
|
|
struct nlm_host *host;
|
|
int error, sysid;
|
|
struct flock fl;
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
memset(&vs, 0, sizeof(vs));
|
|
|
|
host = nlm_find_host_by_name(argp->alock.caller_name,
|
|
svc_getrpccaller(rqstp), rqstp->rq_vers);
|
|
if (!host) {
|
|
result->stat.stat = nlm4_denied_nolocks;
|
|
return (ENOMEM);
|
|
}
|
|
|
|
if (nlm_debug_level >= 3)
|
|
printf("nlm_do_unlock(): caller_name = %s (sysid = %d)\n",
|
|
host->nh_caller_name, host->nh_sysid);
|
|
|
|
nlm_free_finished_locks(host);
|
|
sysid = host->nh_sysid;
|
|
|
|
nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
|
|
nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
|
|
|
|
if (time_uptime < nlm_grace_threshold) {
|
|
result->stat.stat = nlm4_denied_grace_period;
|
|
goto out;
|
|
}
|
|
|
|
error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
|
|
if (error) {
|
|
result->stat.stat = nlm_convert_error(error);
|
|
goto out;
|
|
}
|
|
|
|
fl.l_start = argp->alock.l_offset;
|
|
fl.l_len = argp->alock.l_len;
|
|
fl.l_pid = argp->alock.svid;
|
|
fl.l_sysid = sysid;
|
|
fl.l_whence = SEEK_SET;
|
|
fl.l_type = F_UNLCK;
|
|
error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE);
|
|
|
|
/*
|
|
* Ignore the error - there is no result code for failure,
|
|
* only for grace period.
|
|
*/
|
|
result->stat.stat = nlm4_granted;
|
|
|
|
out:
|
|
nlm_release_vfs_state(&vs);
|
|
if (rpcp)
|
|
*rpcp = nlm_host_get_rpc(host, TRUE);
|
|
nlm_host_release(host);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp,
|
|
|
|
CLIENT **rpcp)
|
|
{
|
|
struct nlm_host *host;
|
|
struct nlm_waiting_lock *nw;
|
|
|
|
memset(result, 0, sizeof(*result));
|
|
|
|
host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers);
|
|
if (!host) {
|
|
result->stat.stat = nlm4_denied_nolocks;
|
|
return (ENOMEM);
|
|
}
|
|
|
|
nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
|
|
result->stat.stat = nlm4_denied;
|
|
|
|
mtx_lock(&nlm_global_lock);
|
|
TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
|
|
if (!nw->nw_waiting)
|
|
continue;
|
|
if (argp->alock.svid == nw->nw_lock.svid
|
|
&& argp->alock.l_offset == nw->nw_lock.l_offset
|
|
&& argp->alock.l_len == nw->nw_lock.l_len
|
|
&& argp->alock.fh.n_len == nw->nw_lock.fh.n_len
|
|
&& !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes,
|
|
nw->nw_lock.fh.n_len)) {
|
|
nw->nw_waiting = FALSE;
|
|
wakeup(nw);
|
|
result->stat.stat = nlm4_granted;
|
|
break;
|
|
}
|
|
}
|
|
mtx_unlock(&nlm_global_lock);
|
|
if (rpcp)
|
|
*rpcp = nlm_host_get_rpc(host, TRUE);
|
|
nlm_host_release(host);
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
nlm_do_free_all(nlm4_notify *argp)
|
|
{
|
|
struct nlm_host *host, *thost;
|
|
|
|
TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) {
|
|
if (!strcmp(host->nh_caller_name, argp->name))
|
|
nlm_host_notify(host, argp->state);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Kernel module glue
|
|
*/
|
|
static int
|
|
nfslockd_modevent(module_t mod, int type, void *data)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
static moduledata_t nfslockd_mod = {
|
|
"nfslockd",
|
|
nfslockd_modevent,
|
|
NULL,
|
|
};
|
|
DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY);
|
|
|
|
/* So that loader and kldload(2) can find us, wherever we are.. */
|
|
MODULE_DEPEND(nfslockd, krpc, 1, 1, 1);
|
|
MODULE_DEPEND(nfslockd, nfs, 1, 1, 1);
|
|
MODULE_VERSION(nfslockd, 1);
|