a9148abd9d
and server. This replaces the RPC implementation of the NFS client and server with the newer RPC implementation originally developed (actually ported from the userland sunrpc code) to support the NFS Lock Manager. I have tested this code extensively and I believe it is stable and that performance is at least equal to the legacy RPC implementation. The NFS code currently contains support for both the new RPC implementation and the older legacy implementation inherited from the original NFS codebase. The default is to use the new implementation - add the NFS_LEGACYRPC option to fall back to the old code. When I merge this support back to RELENG_7, I will probably change this so that users have to 'opt in' to get the new code. To use RPCSEC_GSS on either client or server, you must build a kernel which includes the KGSSAPI option and the crypto device. On the userland side, you must build at least a new libc, mountd, mount_nfs and gssd. You must install new versions of /etc/rc.d/gssd and /etc/rc.d/nfsd and add 'gssd_enable=YES' to /etc/rc.conf. As long as gssd is running, you should be able to mount an NFS filesystem from a server that requires RPCSEC_GSS authentication. The mount itself can happen without any kerberos credentials but all access to the filesystem will be denied unless the accessing user has a valid ticket file in the standard place (/tmp/krb5cc_<uid>). There is currently no support for situations where the ticket file is in a different place, such as when the user logged in via SSH and has delegated credentials from that login. This restriction is also present in Solaris and Linux. In theory, we could improve this in future, possibly using Brooks Davis' implementation of variant symlinks. Supporting RPCSEC_GSS on a server is nearly as simple. You must create service creds for the server in the form 'nfs/<fqdn>@<REALM>' and install them in /etc/krb5.keytab. The standard heimdal utility ktutil makes this fairly easy. After the service creds have been created, you can add a '-sec=krb5' option to /etc/exports and restart both mountd and nfsd. The only other difference an administrator should notice is that nfsd doesn't fork to create service threads any more. In normal operation, there will be two nfsd processes, one in userland waiting for TCP connections and one in the kernel handling requests. The latter process will create as many kthreads as required - these should be visible via 'top -H'. The code has some support for varying the number of service threads according to load but initially at least, nfsd uses a fixed number of threads according to the value supplied to its '-n' option. Sponsored by: Isilon Systems MFC after: 1 month
369 lines
12 KiB
C
369 lines
12 KiB
C
/*-
|
|
* Copyright (c) 1989, 1993, 1995
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* Rick Macklem at The University of Guelph.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)nfs.h 8.4 (Berkeley) 5/1/95
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#ifndef _NFSCLIENT_NFS_H_
|
|
#define _NFSCLIENT_NFS_H_
|
|
|
|
#ifdef _KERNEL
|
|
#include "opt_nfs.h"
|
|
#endif
|
|
|
|
#include <nfsclient/nfsargs.h>
|
|
|
|
/*
|
|
* Tunable constants for nfs
|
|
*/
|
|
|
|
#define NFS_TICKINTVL 10 /* Desired time for a tick (msec) */
|
|
#define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */
|
|
#define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */
|
|
#define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */
|
|
#define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */
|
|
#define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/
|
|
#define NFS_MAXREXMIT 100 /* Stop counting after this many */
|
|
#define NFS_RETRANS 10 /* Num of retrans for UDP soft mounts */
|
|
#define NFS_RETRANS_TCP 2 /* Num of retrans for TCP soft mounts */
|
|
#define NFS_MAXGRPS 16 /* Max. size of groups list */
|
|
#ifndef NFS_MINATTRTIMO
|
|
#define NFS_MINATTRTIMO 3 /* VREG attrib cache timeout in sec */
|
|
#endif
|
|
#ifndef NFS_MAXATTRTIMO
|
|
#define NFS_MAXATTRTIMO 60
|
|
#endif
|
|
#ifndef NFS_MINDIRATTRTIMO
|
|
#define NFS_MINDIRATTRTIMO 30 /* VDIR attrib cache timeout in sec */
|
|
#endif
|
|
#ifndef NFS_MAXDIRATTRTIMO
|
|
#define NFS_MAXDIRATTRTIMO 60
|
|
#endif
|
|
#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
|
|
#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
|
|
#define NFS_READDIRSIZE 8192 /* Def. readdir size */
|
|
#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
|
|
#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
|
|
#define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */
|
|
#define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */
|
|
#ifdef _KERNEL
|
|
#define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */
|
|
#endif
|
|
#define NFS_MAXDEADTHRESH 9 /* How long till we say 'server not responding' */
|
|
|
|
/*
|
|
* Oddballs
|
|
*/
|
|
#define NFS_CMPFH(n, f, s) \
|
|
((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s)))
|
|
#define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3)
|
|
#define NFS_ISV4(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV4)
|
|
|
|
#define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */
|
|
#define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */
|
|
#define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */
|
|
#define NFSSTA_WANTSND 0x02000000 /* Want above */
|
|
#define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */
|
|
#define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */
|
|
|
|
|
|
/*
|
|
* XXX to allow amd to include nfs.h without nfsproto.h
|
|
*/
|
|
#ifdef NFS_NPROCS
|
|
#include <nfsclient/nfsstats.h>
|
|
#endif
|
|
|
|
/*
|
|
* vfs.nfs sysctl(3) identifiers
|
|
*/
|
|
#define NFS_NFSSTATS 1 /* struct: struct nfsstats */
|
|
|
|
/*
|
|
* File context information for nfsv4. Currently, there is only one
|
|
* lockowner for the whole machine "0."
|
|
*/
|
|
struct nfs4_fctx {
|
|
TAILQ_ENTRY(nfs4_fstate) next;
|
|
uint32_t refcnt;
|
|
struct nfs4_lowner *lop;
|
|
struct nfsnode *np;
|
|
char stateid[NFSX_V4STATEID];
|
|
};
|
|
|
|
#ifdef _KERNEL
|
|
|
|
#ifdef MALLOC_DECLARE
|
|
MALLOC_DECLARE(M_NFSREQ);
|
|
MALLOC_DECLARE(M_NFSDIROFF);
|
|
MALLOC_DECLARE(M_NFSBIGFH);
|
|
MALLOC_DECLARE(M_NFSHASH);
|
|
MALLOC_DECLARE(M_NFSDIRECTIO);
|
|
#endif
|
|
|
|
extern struct uma_zone *nfsmount_zone;
|
|
|
|
#ifdef NFS_LEGACYRPC
|
|
extern struct callout nfs_callout;
|
|
#endif
|
|
extern struct nfsstats nfsstats;
|
|
extern struct mtx nfs_iod_mtx;
|
|
|
|
extern int nfs_numasync;
|
|
extern unsigned int nfs_iodmax;
|
|
extern int nfs_pbuf_freecnt;
|
|
extern int nfs_ticks;
|
|
|
|
/* Data constants in XDR form */
|
|
extern u_int32_t nfs_true, nfs_false, nfs_xdrneg1;
|
|
extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers;
|
|
extern u_int32_t rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr;
|
|
|
|
extern int nfsv3_procid[NFS_NPROCS];
|
|
|
|
/*
|
|
* Socket errors ignored for connectionless sockets??
|
|
* For now, ignore them all
|
|
*/
|
|
#define NFSIGNORE_SOERROR(s, e) \
|
|
((e) != EINTR && (e) != EIO && \
|
|
(e) != ERESTART && (e) != EWOULDBLOCK && \
|
|
((s) & PR_CONNREQUIRED) == 0)
|
|
|
|
#ifdef NFS_LEGACYRPC
|
|
|
|
/*
|
|
* Nfs outstanding request list element
|
|
*/
|
|
struct nfsreq {
|
|
TAILQ_ENTRY(nfsreq) r_chain;
|
|
struct mbuf *r_mreq;
|
|
struct mbuf *r_mrep;
|
|
struct mbuf *r_md;
|
|
caddr_t r_dpos;
|
|
struct nfsmount *r_nmp;
|
|
struct vnode *r_vp;
|
|
u_int32_t r_xid;
|
|
int r_flags; /* flags on request, see below */
|
|
int r_retry; /* max retransmission count */
|
|
int r_rexmit; /* current retrans count */
|
|
int r_timer; /* tick counter on reply */
|
|
u_int32_t r_procnum; /* NFS procedure number */
|
|
int r_rtt; /* RTT for rpc */
|
|
int r_lastmsg; /* last tprintf */
|
|
struct thread *r_td; /* Proc that did I/O system call */
|
|
struct mtx r_mtx; /* Protects nfsreq fields */
|
|
};
|
|
|
|
/*
|
|
* Queue head for nfsreq's
|
|
*/
|
|
extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq;
|
|
|
|
/* Flag values for r_flags */
|
|
#define R_TIMING 0x01 /* timing request (in mntp) */
|
|
#define R_SENT 0x02 /* request has been sent */
|
|
#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
|
|
#define R_RESENDERR 0x08 /* Resend failed */
|
|
#define R_SOCKERR 0x10 /* Fatal error on socket */
|
|
#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
|
|
#define R_MUSTRESEND 0x40 /* Must resend request */
|
|
#define R_GETONEREP 0x80 /* Probe for one reply only */
|
|
#define R_PIN_REQ 0x100 /* Pin request down (rexmit in prog or other) */
|
|
|
|
#else
|
|
|
|
/*
|
|
* This is only needed to keep things working while we support
|
|
* compiling for both RPC implementations.
|
|
*/
|
|
struct nfsreq;
|
|
struct nfsmount;
|
|
|
|
#endif
|
|
|
|
struct buf;
|
|
struct socket;
|
|
struct uio;
|
|
struct vattr;
|
|
|
|
/*
|
|
* Pointers to ops that differ from v3 to v4
|
|
*/
|
|
struct nfs_rpcops {
|
|
int (*nr_readrpc)(struct vnode *vp, struct uio *uiop,
|
|
struct ucred *cred);
|
|
int (*nr_writerpc)(struct vnode *vp, struct uio *uiop,
|
|
struct ucred *cred, int *iomode, int *must_commit);
|
|
int (*nr_writebp)(struct buf *bp, int force, struct thread *td);
|
|
int (*nr_readlinkrpc)(struct vnode *vp, struct uio *uiop,
|
|
struct ucred *cred);
|
|
void (*nr_invaldir)(struct vnode *vp);
|
|
int (*nr_commit)(struct vnode *vp, u_quad_t offset, int cnt,
|
|
struct ucred *cred, struct thread *td);
|
|
};
|
|
|
|
/*
|
|
* Defines for WebNFS
|
|
*/
|
|
|
|
#define WEBNFS_ESC_CHAR '%'
|
|
#define WEBNFS_SPECCHAR_START 0x80
|
|
|
|
#define WEBNFS_NATIVE_CHAR 0x80
|
|
/*
|
|
* ..
|
|
* Possibly more here in the future.
|
|
*/
|
|
|
|
/*
|
|
* Macro for converting escape characters in WebNFS pathnames.
|
|
* Should really be in libkern.
|
|
*/
|
|
|
|
#define HEXTOC(c) \
|
|
((c) >= 'a' ? ((c) - ('a' - 10)) : \
|
|
((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0')))
|
|
#define HEXSTRTOI(p) \
|
|
((HEXTOC(p[0]) << 4) + HEXTOC(p[1]))
|
|
|
|
/* nfs_sigintr() helper, when 'rep' has all we need */
|
|
#define NFS_SIGREP(rep) nfs_sigintr((rep)->r_nmp, (rep), (rep)->r_td)
|
|
|
|
#ifdef NFS_DEBUG
|
|
|
|
extern int nfs_debug;
|
|
#define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */
|
|
#define NFS_DEBUG_WG 2 /* server write gathering */
|
|
#define NFS_DEBUG_RC 4 /* server request caching */
|
|
|
|
#define NFS_DPF(cat, args) \
|
|
do { \
|
|
if (nfs_debug & NFS_DEBUG_##cat) printf args; \
|
|
} while (0)
|
|
|
|
#else
|
|
|
|
#define NFS_DPF(cat, args)
|
|
|
|
#endif
|
|
|
|
/*
|
|
* On fast networks, the estimator will try to reduce the
|
|
* timeout lower than the latency of the server's disks,
|
|
* which results in too many timeouts, so cap the lower
|
|
* bound.
|
|
*/
|
|
#define NFS_MINRTO (NFS_HZ >> 2)
|
|
|
|
/*
|
|
* Keep the RTO from increasing to unreasonably large values
|
|
* when a server is not responding.
|
|
*/
|
|
#define NFS_MAXRTO (20 * NFS_HZ)
|
|
|
|
enum nfs_rto_timer_t {
|
|
NFS_DEFAULT_TIMER,
|
|
NFS_GETATTR_TIMER,
|
|
NFS_LOOKUP_TIMER,
|
|
NFS_READ_TIMER,
|
|
NFS_WRITE_TIMER,
|
|
};
|
|
#define NFS_MAX_TIMER (NFS_WRITE_TIMER)
|
|
|
|
#define NFS_INITRTT (NFS_HZ << 3)
|
|
|
|
vfs_init_t nfs_init;
|
|
vfs_uninit_t nfs_uninit;
|
|
int nfs_mountroot(struct mount *mp, struct thread *td);
|
|
|
|
#ifdef NFS_LEGACYRPC
|
|
#ifndef NFS4_USE_RPCCLNT
|
|
int nfs_send(struct socket *, struct sockaddr *, struct mbuf *,
|
|
struct nfsreq *);
|
|
int nfs_connect_lock(struct nfsreq *);
|
|
void nfs_connect_unlock(struct nfsreq *);
|
|
void nfs_up(struct nfsreq *, struct nfsmount *, struct thread *,
|
|
const char *, int);
|
|
void nfs_down(struct nfsreq *, struct nfsmount *, struct thread *,
|
|
const char *, int, int);
|
|
#endif /* ! NFS4_USE_RPCCLNT */
|
|
#endif
|
|
|
|
int nfs_vinvalbuf(struct vnode *, int, struct thread *, int);
|
|
int nfs_readrpc(struct vnode *, struct uio *, struct ucred *);
|
|
int nfs_writerpc(struct vnode *, struct uio *, struct ucred *, int *,
|
|
int *);
|
|
int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt,
|
|
struct ucred *cred, struct thread *td);
|
|
int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *);
|
|
int nfs_nfsiodnew(void);
|
|
int nfs_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *);
|
|
int nfs_doio(struct vnode *, struct buf *, struct ucred *, struct thread *);
|
|
void nfs_doio_directwrite (struct buf *);
|
|
int nfs_readlinkrpc(struct vnode *, struct uio *, struct ucred *);
|
|
int nfs_sigintr(struct nfsmount *, struct nfsreq *, struct thread *);
|
|
int nfs_readdirplusrpc(struct vnode *, struct uio *, struct ucred *);
|
|
int nfs_request(struct vnode *, struct mbuf *, int, struct thread *,
|
|
struct ucred *, struct mbuf **, struct mbuf **, caddr_t *);
|
|
int nfs_loadattrcache(struct vnode **, struct mbuf **, caddr_t *,
|
|
struct vattr *, int);
|
|
int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *);
|
|
void nfs_nhinit(void);
|
|
void nfs_nhuninit(void);
|
|
int nfs_nmcancelreqs(struct nfsmount *);
|
|
void nfs_timer(void*);
|
|
|
|
int nfs_connect(struct nfsmount *, struct nfsreq *);
|
|
void nfs_disconnect(struct nfsmount *);
|
|
void nfs_safedisconnect(struct nfsmount *);
|
|
int nfs_getattrcache(struct vnode *, struct vattr *);
|
|
int nfs_iosize(struct nfsmount *nmp);
|
|
int nfsm_strtmbuf(struct mbuf **, char **, const char *, long);
|
|
int nfs_bioread(struct vnode *, struct uio *, int, struct ucred *);
|
|
int nfsm_uiotombuf(struct uio *, struct mbuf **, int, caddr_t *);
|
|
void nfs_clearcommit(struct mount *);
|
|
int nfs_writebp(struct buf *, int, struct thread *);
|
|
int nfs_fsinfo(struct nfsmount *, struct vnode *, struct ucred *,
|
|
struct thread *);
|
|
int nfs_meta_setsize (struct vnode *, struct ucred *,
|
|
struct thread *, u_quad_t);
|
|
|
|
void nfs_set_sigmask(struct thread *td, sigset_t *oldset);
|
|
void nfs_restore_sigmask(struct thread *td, sigset_t *set);
|
|
int nfs_msleep(struct thread *td, void *ident, struct mtx *mtx,
|
|
int priority, char *wmesg, int timo);
|
|
|
|
#endif /* _KERNEL */
|
|
|
|
#endif
|