freebsd-skq/sys/nfsserver/nfs_srvsock.c
Jamie Gritton 0304c73163 Add hierarchical jails. A jail may further virtualize its environment
by creating a child jail, which is visible to that jail and to any
parent jails.  Child jails may be restricted more than their parents,
but never less.  Jail names reflect this hierarchy, being MIB-style
dot-separated strings.

Every thread now points to a jail, the default being prison0, which
contains information about the physical system.  Prison0's root
directory is the same as rootvnode; its hostname is the same as the
global hostname, and its securelevel replaces the global securelevel.
Note that the variable "securelevel" has actually gone away, which
should not cause any problems for code that properly uses
securelevel_gt() and securelevel_ge().

Some jail-related permissions that were kept in global variables and
set via sysctls are now per-jail settings.  The sysctls still exist for
backward compatibility, used only by the now-deprecated jail(2) system
call.

Approved by:	bz (mentor)
2009-05-27 14:11:23 +00:00

820 lines
20 KiB
C

/*-
* Copyright (c) 1989, 1991, 1993, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Socket operations for use by nfs
*/
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/refcount.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfsserver/nfs.h>
#include <nfs/xdr_subs.h>
#include <nfsserver/nfsm_subs.h>
#include <security/mac/mac_framework.h>
#ifdef NFS_LEGACYRPC
#define TRUE 1
#define FALSE 0
static int nfs_realign_test;
static int nfs_realign_count;
SYSCTL_DECL(_vfs_nfsrv);
SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
/*
* There is a congestion window for outstanding rpcs maintained per mount
* point. The cwnd size is adjusted in roughly the way that:
* Van Jacobson, Congestion avoidance and Control, In "Proceedings of
* SIGCOMM '88". ACM, August 1988.
* describes for TCP. The cwnd size is chopped in half on a retransmit timeout
* and incremented by 1/cwnd when each rpc reply is received and a full cwnd
* of rpcs is in progress.
* (The sent count and cwnd are scaled for integer arith.)
* Variants of "slow start" were tried and were found to be too much of a
* performance hit (ave. rtt 3 times larger),
* I suspect due to the large rtt that nfs rpcs have.
*/
#define NFS_CWNDSCALE 256
#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
struct callout nfsrv_callout;
static void nfs_realign(struct mbuf **pm, int hsiz); /* XXX SHARED */
static int nfsrv_getstream(struct nfssvc_sock *, int);
int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
struct nfssvc_sock *slp,
struct mbuf **mreqp) = {
nfsrv_null,
nfsrv_getattr,
nfsrv_setattr,
nfsrv_lookup,
nfsrv3_access,
nfsrv_readlink,
nfsrv_read,
nfsrv_write,
nfsrv_create,
nfsrv_mkdir,
nfsrv_symlink,
nfsrv_mknod,
nfsrv_remove,
nfsrv_rmdir,
nfsrv_rename,
nfsrv_link,
nfsrv_readdir,
nfsrv_readdirplus,
nfsrv_statfs,
nfsrv_fsinfo,
nfsrv_pathconf,
nfsrv_commit,
nfsrv_noop
};
/*
* Generate the rpc reply header
* siz arg. is used to decide if adding a cluster is worthwhile
*/
struct mbuf *
nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
struct mbuf **mbp, caddr_t *bposp)
{
u_int32_t *tl;
struct mbuf *mreq;
caddr_t bpos;
struct mbuf *mb;
nd->nd_repstat = err;
if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */
siz = 0;
MGETHDR(mreq, M_WAIT, MT_DATA);
mb = mreq;
/*
* If this is a big reply, use a cluster else
* try and leave leading space for the lower level headers.
*/
mreq->m_len = 6 * NFSX_UNSIGNED;
siz += RPC_REPLYSIZ;
if ((max_hdr + siz) >= MINCLSIZE) {
MCLGET(mreq, M_WAIT);
} else
mreq->m_data += min(max_hdr, M_TRAILINGSPACE(mreq));
tl = mtod(mreq, u_int32_t *);
bpos = ((caddr_t)tl) + mreq->m_len;
*tl++ = txdr_unsigned(nd->nd_retxid);
*tl++ = nfsrv_rpc_reply;
if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
*tl++ = nfsrv_rpc_msgdenied;
if (err & NFSERR_AUTHERR) {
*tl++ = nfsrv_rpc_autherr;
*tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
mreq->m_len -= NFSX_UNSIGNED;
bpos -= NFSX_UNSIGNED;
} else {
*tl++ = nfsrv_rpc_mismatch;
*tl++ = txdr_unsigned(RPC_VER2);
*tl = txdr_unsigned(RPC_VER2);
}
} else {
*tl++ = nfsrv_rpc_msgaccepted;
/*
* Send a RPCAUTH_NULL verifier - no Kerberos.
*/
*tl++ = 0;
*tl++ = 0;
switch (err) {
case EPROGUNAVAIL:
*tl = txdr_unsigned(RPC_PROGUNAVAIL);
break;
case EPROGMISMATCH:
*tl = txdr_unsigned(RPC_PROGMISMATCH);
tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
*tl++ = txdr_unsigned(2);
*tl = txdr_unsigned(3);
break;
case EPROCUNAVAIL:
*tl = txdr_unsigned(RPC_PROCUNAVAIL);
break;
case EBADRPC:
*tl = txdr_unsigned(RPC_GARBAGE);
break;
default:
*tl = 0;
if (err != NFSERR_RETVOID) {
tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
if (err)
*tl = txdr_unsigned(nfsrv_errmap(nd, err));
else
*tl = 0;
}
break;
}
}
*mbp = mb;
*bposp = bpos;
if (err != 0 && err != NFSERR_RETVOID)
nfsrvstats.srvrpc_errs++;
return mreq;
}
/*
* nfs_realign:
*
* Check for badly aligned mbuf data and realign by copying the unaligned
* portion of the data into a new mbuf chain and freeing the portions
* of the old chain that were replaced.
*
* We cannot simply realign the data within the existing mbuf chain
* because the underlying buffers may contain other rpc commands and
* we cannot afford to overwrite them.
*
* We would prefer to avoid this situation entirely. The situation does
* not occur with NFS/UDP and is supposed to only occassionally occur
* with TCP. Use vfs.nfs.realign_count and realign_test to check this.
*/
static void
nfs_realign(struct mbuf **pm, int hsiz) /* XXX COMMON */
{
struct mbuf *m;
struct mbuf *n = NULL;
int off = 0;
++nfs_realign_test;
while ((m = *pm) != NULL) {
if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
MGET(n, M_WAIT, MT_DATA);
if (m->m_len >= MINCLSIZE) {
MCLGET(n, M_WAIT);
}
n->m_len = 0;
break;
}
pm = &m->m_next;
}
/*
* If n is non-NULL, loop on m copying data, then replace the
* portion of the chain that had to be realigned.
*/
if (n != NULL) {
++nfs_realign_count;
while (m) {
m_copyback(n, off, m->m_len, mtod(m, caddr_t));
off += m->m_len;
m = m->m_next;
}
m_freem(*pm);
*pm = n;
}
}
/*
* Parse an RPC request
* - verify it
* - fill in the cred struct.
*/
int
nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
{
int len, i;
u_int32_t *tl;
caddr_t dpos;
u_int32_t nfsvers, auth_type;
int error = 0;
struct mbuf *mrep, *md;
NFSD_LOCK_ASSERT();
mrep = nd->nd_mrep;
md = nd->nd_md;
dpos = nd->nd_dpos;
if (has_header) {
tl = nfsm_dissect_nonblock(u_int32_t *, 10 * NFSX_UNSIGNED);
nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
if (*tl++ != nfsrv_rpc_call) {
m_freem(mrep);
return (EBADRPC);
}
} else
tl = nfsm_dissect_nonblock(u_int32_t *, 8 * NFSX_UNSIGNED);
nd->nd_repstat = 0;
nd->nd_flag = 0;
if (*tl++ != nfsrv_rpc_vers) {
nd->nd_repstat = ERPCMISMATCH;
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
if (*tl != nfsrv_nfs_prog) {
nd->nd_repstat = EPROGUNAVAIL;
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
tl++;
nfsvers = fxdr_unsigned(u_int32_t, *tl++);
if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
nd->nd_repstat = EPROGMISMATCH;
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
if (nd->nd_procnum == NFSPROC_NULL)
return (0);
if (nfsvers == NFS_VER3) {
nd->nd_flag = ND_NFSV3;
if (nd->nd_procnum >= NFS_NPROCS) {
nd->nd_repstat = EPROCUNAVAIL;
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
} else {
if (nd->nd_procnum > NFSV2PROC_STATFS) {
nd->nd_repstat = EPROCUNAVAIL;
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
/* Map the v2 procedure numbers into v3 ones */
nd->nd_procnum = nfsrv_nfsv3_procid[nd->nd_procnum];
}
auth_type = *tl++;
len = fxdr_unsigned(int, *tl++);
if (len < 0 || len > RPCAUTH_MAXSIZ) {
m_freem(mrep);
return (EBADRPC);
}
/*
* Handle auth_unix;
*/
if (auth_type == nfsrv_rpc_auth_unix) {
len = fxdr_unsigned(int, *++tl);
if (len < 0 || len > NFS_MAXNAMLEN) {
m_freem(mrep);
return (EBADRPC);
}
nfsm_adv(nfsm_rndup(len));
tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
nd->nd_cr->cr_uid = nd->nd_cr->cr_ruid =
nd->nd_cr->cr_svuid = fxdr_unsigned(uid_t, *tl++);
nd->nd_cr->cr_groups[0] = nd->nd_cr->cr_rgid =
nd->nd_cr->cr_svgid = fxdr_unsigned(gid_t, *tl++);
#ifdef MAC
mac_cred_associate_nfsd(nd->nd_cr);
#endif
len = fxdr_unsigned(int, *tl);
if (len < 0 || len > RPCAUTH_UNIXGIDS) {
m_freem(mrep);
return (EBADRPC);
}
tl = nfsm_dissect_nonblock(u_int32_t *, (len + 2) * NFSX_UNSIGNED);
for (i = 1; i <= len; i++)
if (i < NGROUPS)
nd->nd_cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
else
tl++;
nd->nd_cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
if (nd->nd_cr->cr_ngroups > 1)
nfsrvw_sort(nd->nd_cr->cr_groups, nd->nd_cr->cr_ngroups);
len = fxdr_unsigned(int, *++tl);
if (len < 0 || len > RPCAUTH_MAXSIZ) {
m_freem(mrep);
return (EBADRPC);
}
if (len > 0)
nfsm_adv(nfsm_rndup(len));
nd->nd_credflavor = RPCAUTH_UNIX;
} else {
nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
nd->nd_procnum = NFSPROC_NOOP;
return (0);
}
nd->nd_md = md;
nd->nd_dpos = dpos;
return (0);
nfsmout:
return (error);
}
/*
* Socket upcall routine for the nfsd sockets.
* The caddr_t arg is a pointer to the "struct nfssvc_sock".
* Essentially do as much as possible non-blocking, else punt and it will
* be called with M_WAIT from an nfsd.
*/
void
nfsrv_rcv(struct socket *so, void *arg, int waitflag)
{
struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
struct mbuf *m;
struct mbuf *mp;
struct sockaddr *nam;
struct uio auio;
int flags, error;
NFSD_UNLOCK_ASSERT();
/* XXXRW: Unlocked read. */
if ((slp->ns_flag & SLP_VALID) == 0)
return;
/*
* We can't do this in the context of a socket callback
* because we're called with locks held.
* XXX: SMP
*/
if (waitflag == M_DONTWAIT) {
NFSD_LOCK();
slp->ns_flag |= SLP_NEEDQ;
goto dorecs;
}
NFSD_LOCK();
auio.uio_td = NULL;
if (so->so_type == SOCK_STREAM) {
/*
* If there are already records on the queue, defer soreceive()
* to an nfsd so that there is feedback to the TCP layer that
* the nfs servers are heavily loaded.
*/
if (STAILQ_FIRST(&slp->ns_rec) != NULL &&
waitflag == M_DONTWAIT) {
slp->ns_flag |= SLP_NEEDQ;
goto dorecs;
}
/*
* Do soreceive().
*/
auio.uio_resid = 1000000000;
flags = MSG_DONTWAIT;
NFSD_UNLOCK();
error = soreceive(so, &nam, &auio, &mp, NULL, &flags);
NFSD_LOCK();
if (error || mp == NULL) {
if (error == EWOULDBLOCK)
slp->ns_flag |= SLP_NEEDQ;
else
slp->ns_flag |= SLP_DISCONN;
goto dorecs;
}
m = mp;
if (slp->ns_rawend) {
slp->ns_rawend->m_next = m;
slp->ns_cc += 1000000000 - auio.uio_resid;
} else {
slp->ns_raw = m;
slp->ns_cc = 1000000000 - auio.uio_resid;
}
while (m->m_next)
m = m->m_next;
slp->ns_rawend = m;
/*
* Now try and parse record(s) out of the raw stream data.
*/
error = nfsrv_getstream(slp, waitflag);
if (error) {
if (error == EPERM)
slp->ns_flag |= SLP_DISCONN;
else
slp->ns_flag |= SLP_NEEDQ;
}
} else {
do {
auio.uio_resid = 1000000000;
flags = MSG_DONTWAIT;
NFSD_UNLOCK();
error = soreceive(so, &nam, &auio, &mp, NULL, &flags);
if (mp) {
struct nfsrv_rec *rec;
rec = malloc(sizeof(struct nfsrv_rec),
M_NFSRVDESC,
waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
if (!rec) {
if (nam)
free(nam, M_SONAME);
m_freem(mp);
NFSD_LOCK();
continue;
}
nfs_realign(&mp, 10 * NFSX_UNSIGNED);
NFSD_LOCK();
rec->nr_address = nam;
rec->nr_packet = mp;
STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
} else
NFSD_LOCK();
if (error) {
if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
&& error != EWOULDBLOCK) {
slp->ns_flag |= SLP_DISCONN;
goto dorecs;
}
}
} while (mp);
}
/*
* Now try and process the request records, non-blocking.
*/
dorecs:
if (waitflag == M_DONTWAIT &&
(STAILQ_FIRST(&slp->ns_rec) != NULL ||
(slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
nfsrv_wakenfsd(slp);
NFSD_UNLOCK();
}
/*
* Try and extract an RPC request from the mbuf data list received on a
* stream socket. The "waitflag" argument indicates whether or not it
* can sleep.
*/
static int
nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
{
struct mbuf *m, **mpp;
char *cp1, *cp2;
int len;
struct mbuf *om, *m2, *recm;
u_int32_t recmark;
NFSD_LOCK_ASSERT();
if (slp->ns_flag & SLP_GETSTREAM)
panic("nfs getstream");
slp->ns_flag |= SLP_GETSTREAM;
for (;;) {
if (slp->ns_reclen == 0) {
if (slp->ns_cc < NFSX_UNSIGNED) {
slp->ns_flag &= ~SLP_GETSTREAM;
return (0);
}
m = slp->ns_raw;
if (m->m_len >= NFSX_UNSIGNED) {
bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
m->m_data += NFSX_UNSIGNED;
m->m_len -= NFSX_UNSIGNED;
} else {
cp1 = (caddr_t)&recmark;
cp2 = mtod(m, caddr_t);
while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
while (m->m_len == 0) {
m = m->m_next;
cp2 = mtod(m, caddr_t);
}
*cp1++ = *cp2++;
m->m_data++;
m->m_len--;
}
}
slp->ns_cc -= NFSX_UNSIGNED;
recmark = ntohl(recmark);
slp->ns_reclen = recmark & ~0x80000000;
if (recmark & 0x80000000)
slp->ns_flag |= SLP_LASTFRAG;
else
slp->ns_flag &= ~SLP_LASTFRAG;
if (slp->ns_reclen > NFS_MAXPACKET || slp->ns_reclen <= 0) {
slp->ns_flag &= ~SLP_GETSTREAM;
return (EPERM);
}
}
/*
* Now get the record part.
*
* Note that slp->ns_reclen may be 0. Linux sometimes
* generates 0-length RPCs.
*/
recm = NULL;
if (slp->ns_cc == slp->ns_reclen) {
recm = slp->ns_raw;
slp->ns_raw = slp->ns_rawend = NULL;
slp->ns_cc = slp->ns_reclen = 0;
} else if (slp->ns_cc > slp->ns_reclen) {
len = 0;
m = slp->ns_raw;
om = NULL;
while (len < slp->ns_reclen) {
if ((len + m->m_len) > slp->ns_reclen) {
NFSD_UNLOCK();
m2 = m_copym(m, 0, slp->ns_reclen - len,
waitflag);
NFSD_LOCK();
if (m2) {
if (om) {
om->m_next = m2;
recm = slp->ns_raw;
} else
recm = m2;
m->m_data += slp->ns_reclen - len;
m->m_len -= slp->ns_reclen - len;
len = slp->ns_reclen;
} else {
slp->ns_flag &= ~SLP_GETSTREAM;
return (EWOULDBLOCK);
}
} else if ((len + m->m_len) == slp->ns_reclen) {
om = m;
len += m->m_len;
m = m->m_next;
recm = slp->ns_raw;
om->m_next = NULL;
} else {
om = m;
len += m->m_len;
m = m->m_next;
}
}
slp->ns_raw = m;
slp->ns_cc -= len;
slp->ns_reclen = 0;
} else {
slp->ns_flag &= ~SLP_GETSTREAM;
return (0);
}
/*
* Accumulate the fragments into a record.
*/
mpp = &slp->ns_frag;
while (*mpp)
mpp = &((*mpp)->m_next);
*mpp = recm;
if (slp->ns_flag & SLP_LASTFRAG) {
struct nfsrv_rec *rec;
NFSD_UNLOCK();
rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC,
waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
if (rec) {
nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
rec->nr_address = NULL;
rec->nr_packet = slp->ns_frag;
NFSD_LOCK();
STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
} else {
NFSD_LOCK();
}
if (!rec) {
m_freem(slp->ns_frag);
}
slp->ns_frag = NULL;
}
}
}
/*
* Parse an RPC header.
*/
int
nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
struct nfsrv_descript **ndp)
{
struct nfsrv_rec *rec;
struct mbuf *m;
struct sockaddr *nam;
struct nfsrv_descript *nd;
int error;
NFSD_LOCK_ASSERT();
*ndp = NULL;
if ((slp->ns_flag & SLP_VALID) == 0 ||
STAILQ_FIRST(&slp->ns_rec) == NULL)
return (ENOBUFS);
rec = STAILQ_FIRST(&slp->ns_rec);
KASSERT(rec->nr_packet != NULL, ("nfsrv_dorec: missing mbuf"));
STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
nam = rec->nr_address;
m = rec->nr_packet;
free(rec, M_NFSRVDESC);
NFSD_UNLOCK();
nd = malloc(sizeof (struct nfsrv_descript),
M_NFSRVDESC, M_WAITOK);
nd->nd_cr = crget();
prison_hold(&prison0);
nd->nd_cr->cr_prison = &prison0;
NFSD_LOCK();
nd->nd_md = nd->nd_mrep = m;
nd->nd_nam2 = nam;
nd->nd_dpos = mtod(m, caddr_t);
error = nfs_getreq(nd, nfsd, TRUE);
if (error) {
if (nam) {
free(nam, M_SONAME);
}
if (nd->nd_cr != NULL)
crfree(nd->nd_cr);
free((caddr_t)nd, M_NFSRVDESC);
return (error);
}
*ndp = nd;
nfsd->nfsd_nd = nd;
return (0);
}
/*
* Search for a sleeping nfsd and wake it up.
* SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
* running nfsds will go look for the work in the nfssvc_sock list.
*/
void
nfsrv_wakenfsd(struct nfssvc_sock *slp)
{
struct nfsd *nd;
NFSD_LOCK_ASSERT();
if ((slp->ns_flag & SLP_VALID) == 0)
return;
TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
if (nd->nfsd_flag & NFSD_WAITING) {
nd->nfsd_flag &= ~NFSD_WAITING;
if (nd->nfsd_slp)
panic("nfsd wakeup");
slp->ns_sref++;
nd->nfsd_slp = slp;
wakeup(nd);
return;
}
}
slp->ns_flag |= SLP_DOREC;
nfsd_head_flag |= NFSD_CHECKSLP;
}
/*
* This is the nfs send routine.
* For the server side:
* - return EINTR or ERESTART if interrupted by a signal
* - return EPIPE if a connection is lost for connection based sockets (TCP...)
* - do any cleanup required by recoverable socket errors (?)
*/
int
nfsrv_send(struct socket *so, struct sockaddr *nam, struct mbuf *top)
{
struct sockaddr *sendnam;
int error, soflags, flags;
NFSD_UNLOCK_ASSERT();
soflags = so->so_proto->pr_flags;
if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
sendnam = NULL;
else
sendnam = nam;
if (so->so_type == SOCK_SEQPACKET)
flags = MSG_EOR;
else
flags = 0;
error = sosend(so, sendnam, 0, top, 0, flags, curthread/*XXX*/);
if (error == ENOBUFS && so->so_type == SOCK_DGRAM)
error = 0;
if (error) {
log(LOG_INFO, "nfsd send error %d\n", error);
/*
* Handle any recoverable (soft) socket errors here. (?)
*/
if (error != EINTR && error != ERESTART &&
error != EWOULDBLOCK && error != EPIPE)
error = 0;
}
return (error);
}
/*
* NFS server timer routine.
*/
void
nfsrv_timer(void *arg)
{
struct nfssvc_sock *slp;
u_quad_t cur_usec;
NFSD_LOCK();
/*
* Scan the write gathering queues for writes that need to be
* completed now.
*/
cur_usec = nfs_curusec();
TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
if (LIST_FIRST(&slp->ns_tq) &&
LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
nfsrv_wakenfsd(slp);
}
NFSD_UNLOCK();
callout_reset(&nfsrv_callout, nfsrv_ticks, nfsrv_timer, NULL);
}
#endif /* NFS_LEGACYRPC */