freebsd-dev/sys/fs/nfs/nfs_var.h

731 lines
34 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* XXX needs <nfs/rpcv2.h> and <nfs/nfs.h> because of typedefs
*/
struct uio;
struct ucred;
struct nfscred;
NFSPROC_T;
struct buf;
struct sockaddr_in;
struct nfs_dlmount;
struct file;
struct nfsmount;
struct socket;
struct nfsreq;
struct nfssockreq;
struct vattr;
struct nameidata;
struct nfsnode;
struct nfsfh;
struct sillyrename;
struct componentname;
struct nfsd_srvargs;
struct nfsrv_descript;
struct nfs_fattr;
union nethostaddr;
struct nfsstate;
struct nfslock;
struct nfsclient;
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
struct nfslayout;
struct nfsdsession;
struct nfslockconflict;
struct nfsd_idargs;
struct nfsd_clid;
struct nfsusrgrp;
struct nfsclowner;
struct nfsclopen;
struct nfsclopenhead;
struct nfsclclient;
struct nfsclsession;
struct nfscllockowner;
struct nfscllock;
struct nfscldeleg;
struct nfscllayout;
struct nfscldevinfo;
struct nfsv4lock;
struct nfsvattr;
struct nfs_vattr;
struct NFSSVCARGS;
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
struct nfsdevice;
struct pnfsdsfile;
struct pnfsdsattr;
#ifdef __FreeBSD__
NFS_ACCESS_ARGS;
NFS_OPEN_ARGS;
NFS_GETATTR_ARGS;
NFS_LOOKUP_ARGS;
NFS_READDIR_ARGS;
#endif
/* nfs_nfsdstate.c */
int nfsrv_setclient(struct nfsrv_descript *, struct nfsclient **,
nfsquad_t *, nfsquad_t *, NFSPROC_T *);
int nfsrv_getclient(nfsquad_t, int, struct nfsclient **, struct nfsdsession *,
nfsquad_t, uint32_t, struct nfsrv_descript *, NFSPROC_T *);
int nfsrv_destroyclient(nfsquad_t, NFSPROC_T *);
int nfsrv_destroysession(struct nfsrv_descript *, uint8_t *);
int nfsrv_bindconnsess(struct nfsrv_descript *, uint8_t *, int *);
int nfsrv_freestateid(struct nfsrv_descript *, nfsv4stateid_t *, NFSPROC_T *);
int nfsrv_teststateid(struct nfsrv_descript *, nfsv4stateid_t *, NFSPROC_T *);
int nfsrv_adminrevoke(struct nfsd_clid *, NFSPROC_T *);
void nfsrv_dumpclients(struct nfsd_dumpclients *, int);
void nfsrv_dumplocks(vnode_t, struct nfsd_dumplocks *, int, NFSPROC_T *);
int nfsrv_lockctrl(vnode_t, struct nfsstate **,
struct nfslock **, struct nfslockconflict *, nfsquad_t, nfsv4stateid_t *,
struct nfsexstuff *, struct nfsrv_descript *, NFSPROC_T *);
int nfsrv_openctrl(struct nfsrv_descript *, vnode_t,
struct nfsstate **, nfsquad_t, nfsv4stateid_t *, nfsv4stateid_t *,
u_int32_t *, struct nfsexstuff *, NFSPROC_T *, u_quad_t);
int nfsrv_opencheck(nfsquad_t, nfsv4stateid_t *, struct nfsstate *,
vnode_t, struct nfsrv_descript *, NFSPROC_T *, int);
int nfsrv_openupdate(vnode_t, struct nfsstate *, nfsquad_t,
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
nfsv4stateid_t *, struct nfsrv_descript *, NFSPROC_T *, int *);
int nfsrv_delegupdate(struct nfsrv_descript *, nfsquad_t, nfsv4stateid_t *,
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
vnode_t, int, struct ucred *, NFSPROC_T *, int *);
int nfsrv_releaselckown(struct nfsstate *, nfsquad_t, NFSPROC_T *);
void nfsrv_zapclient(struct nfsclient *, NFSPROC_T *);
int nfssvc_idname(struct nfsd_idargs *);
void nfsrv_servertimer(void);
int nfsrv_getclientipaddr(struct nfsrv_descript *, struct nfsclient *);
void nfsrv_setupstable(NFSPROC_T *);
void nfsrv_updatestable(NFSPROC_T *);
void nfsrv_writestable(u_char *, int, int, NFSPROC_T *);
void nfsrv_throwawayopens(NFSPROC_T *);
int nfsrv_checkremove(vnode_t, int, NFSPROC_T *);
void nfsd_recalldelegation(vnode_t, NFSPROC_T *);
void nfsd_disabledelegation(vnode_t, NFSPROC_T *);
int nfsrv_checksetattr(vnode_t, struct nfsrv_descript *,
nfsv4stateid_t *, struct nfsvattr *, nfsattrbit_t *, struct nfsexstuff *,
NFSPROC_T *);
int nfsrv_checkgetattr(struct nfsrv_descript *, vnode_t,
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
struct nfsvattr *, nfsattrbit_t *, NFSPROC_T *);
int nfsrv_nfsuserdport(struct sockaddr *, u_short, NFSPROC_T *);
void nfsrv_nfsuserddelport(void);
void nfsrv_throwawayallstate(NFSPROC_T *);
int nfsrv_checksequence(struct nfsrv_descript *, uint32_t, uint32_t *,
uint32_t *, int, uint32_t *, NFSPROC_T *);
int nfsrv_checkreclaimcomplete(struct nfsrv_descript *, int);
void nfsrv_cache_session(uint8_t *, uint32_t, int, struct mbuf **);
void nfsrv_freeallbackchannel_xprts(void);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrv_layoutcommit(struct nfsrv_descript *, vnode_t, int, int, uint64_t,
uint64_t, uint64_t, int, struct timespec *, int, nfsv4stateid_t *,
int, char *, int *, uint64_t *, struct ucred *, NFSPROC_T *);
int nfsrv_layoutget(struct nfsrv_descript *, vnode_t, struct nfsexstuff *,
int, int *, uint64_t *, uint64_t *, uint64_t, nfsv4stateid_t *, int, int *,
int *, char *, struct ucred *, NFSPROC_T *);
void nfsrv_flexmirrordel(char *, NFSPROC_T *);
void nfsrv_recalloldlayout(NFSPROC_T *);
int nfsrv_layoutreturn(struct nfsrv_descript *, vnode_t, int, int, uint64_t,
uint64_t, int, int, nfsv4stateid_t *, int, uint32_t *, int *,
struct ucred *, NFSPROC_T *);
int nfsrv_getdevinfo(char *, int, uint32_t *, uint32_t *, int *, char **);
void nfsrv_freeonedevid(struct nfsdevice *);
void nfsrv_freealllayoutsanddevids(void);
void nfsrv_freefilelayouts(fhandle_t *);
int nfsrv_deldsserver(int, char *, NFSPROC_T *);
struct nfsdevice *nfsrv_deldsnmp(int, struct nfsmount *, NFSPROC_T *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrv_createdevids(struct nfsd_nfsd_args *, NFSPROC_T *);
int nfsrv_checkdsattr(struct nfsrv_descript *, vnode_t, NFSPROC_T *);
int nfsrv_copymr(vnode_t, vnode_t, vnode_t, struct nfsdevice *,
struct pnfsdsfile *, struct pnfsdsfile *, int, struct ucred *, NFSPROC_T *);
int nfsrv_mdscopymr(char *, char *, char *, char *, int *, char *, NFSPROC_T *,
struct vnode **, struct vnode **, struct pnfsdsfile **, struct nfsdevice **,
struct nfsdevice **);
/* nfs_nfsdserv.c */
int nfsrvd_access(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_getattr(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_setattr(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_lookup(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_readlink(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_read(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_write(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_create(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_mknod(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_remove(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_rename(struct nfsrv_descript *, int,
vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *);
int nfsrvd_link(struct nfsrv_descript *, int,
vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *);
int nfsrvd_symlink(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_mkdir(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_readdir(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_readdirplus(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_commit(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_statfs(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_fsinfo(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_close(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_delegpurge(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_delegreturn(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_getfh(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_lock(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_lockt(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_locku(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_openconfirm(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_opendowngrade(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_renew(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_secinfo(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_setclientid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_setclientidcfrm(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_verify(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_open(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_openattr(struct nfsrv_descript *, int,
vnode_t, vnode_t *, fhandle_t *, struct nfsexstuff *);
int nfsrvd_releaselckown(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_pathconf(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_exchangeid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_createsession(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_sequence(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_reclaimcomplete(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_destroyclientid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_bindconnsess(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_destroysession(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_freestateid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrvd_layoutget(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrvd_getdevinfo(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrvd_layoutcommit(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrvd_layoutreturn(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_teststateid(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
int nfsrvd_notsupp(struct nfsrv_descript *, int,
vnode_t, struct nfsexstuff *);
/* nfs_nfsdsocket.c */
void nfsrvd_rephead(struct nfsrv_descript *);
void nfsrvd_dorpc(struct nfsrv_descript *, int, u_char *, int, u_int32_t);
/* nfs_nfsdcache.c */
void nfsrvd_initcache(void);
2014-01-03 15:09:59 +00:00
int nfsrvd_getcache(struct nfsrv_descript *);
struct nfsrvcache *nfsrvd_updatecache(struct nfsrv_descript *);
void nfsrvd_sentcache(struct nfsrvcache *, int, uint32_t);
void nfsrvd_cleancache(void);
void nfsrvd_refcache(struct nfsrvcache *);
void nfsrvd_derefcache(struct nfsrvcache *);
void nfsrvd_delcache(struct nfsrvcache *);
2014-01-03 15:09:59 +00:00
void nfsrc_trimcache(uint64_t, uint32_t, int);
/* nfs_commonsubs.c */
void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *,
u_int8_t *, int, u_int32_t **, struct nfsclsession *, int, int);
void nfsm_stateidtom(struct nfsrv_descript *, nfsv4stateid_t *, int);
void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *,
vnode_t, int, u_int32_t);
void newnfs_init(void);
int nfsaddr_match(int, union nethostaddr *, NFSSOCKADDR_T);
int nfsaddr2_match(NFSSOCKADDR_T, NFSSOCKADDR_T);
int nfsm_strtom(struct nfsrv_descript *, const char *, int);
int nfsm_mbufuio(struct nfsrv_descript *, struct uio *, int);
int nfsm_fhtom(struct nfsrv_descript *, u_int8_t *, int, int);
int nfsm_advance(struct nfsrv_descript *, int, int);
Revamp the old NFS server's File Handle Affinity (FHA) code so that it will work with either the old or new server. The FHA code keeps a cache of currently active file handles for NFSv2 and v3 requests, so that read and write requests for the same file are directed to the same group of threads (reads) or thread (writes). It does not currently work for NFSv4 requests. They are more complex, and will take more work to support. This improves read-ahead performance, especially with ZFS, if the FHA tuning parameters are configured appropriately. Without the FHA code, concurrent reads that are part of a sequential read from a file will be directed to separate NFS threads. This has the effect of confusing the ZFS zfetch (prefetch) code and makes sequential reads significantly slower with clients like Linux that do a lot of prefetching. The FHA code has also been updated to direct write requests to nearby file offsets to the same thread in the same way it batches reads, and the FHA code will now also send writes to multiple threads when needed. This improves sequential write performance in ZFS, because writes to a file are now more ordered. Since NFS writes (generally less than 64K) are smaller than the typical ZFS record size (usually 128K), out of order NFS writes to the same block can trigger a read in ZFS. Sending them down the same thread increases the odds of their being in order. In order for multiple write threads per file in the FHA code to be useful, writes in the NFS server have been changed to use a LK_SHARED vnode lock, and upgrade that to LK_EXCLUSIVE if the filesystem doesn't allow multiple writers to a file at once. ZFS is currently the only filesystem that allows multiple writers to a file, because it has internal file range locking. This change does not affect the NFSv4 code. This improves random write performance to a single file in ZFS, since we can now have multiple writers inside ZFS at one time. I have changed the default tuning parameters to a 22 bit (4MB) window size (from 256K) and unlimited commands per thread as a result of my benchmarking with ZFS. The FHA code has been updated to allow configuring the tuning parameters from loader tunable variables in addition to sysctl variables. The read offset window calculation has been slightly modified as well. Instead of having separate bins, each file handle has a rolling window of bin_shift size. This minimizes glitches in throughput when shifting from one bin to another. sys/conf/files: Add nfs_fha_new.c and nfs_fha_old.c. Compile nfs_fha.c when either the old or the new NFS server is built. sys/fs/nfs/nfsport.h, sys/fs/nfs/nfs_commonport.c: Bring in changes from Rick Macklem to newnfs_realign that allow it to operate in blocking (M_WAITOK) or non-blocking (M_NOWAIT) mode. sys/fs/nfs/nfs_commonsubs.c, sys/fs/nfs/nfs_var.h: Bring in a change from Rick Macklem to allow telling nfsm_dissect() whether or not to wait for mallocs. sys/fs/nfs/nfsm_subs.h: Bring in changes from Rick Macklem to create a new nfsm_dissect_nonblock() inline function and NFSM_DISSECT_NONBLOCK() macro. sys/fs/nfs/nfs_commonkrpc.c, sys/fs/nfsclient/nfs_clkrpc.c: Add the malloc wait flag to a newnfs_realign() call. sys/fs/nfsserver/nfs_nfsdkrpc.c: Setup the new NFS server's RPC thread pool so that it will call the FHA code. Add the malloc flag argument to newnfs_realign(). Unstaticize newnfs_nfsv3_procid[] so that we can use it in the FHA code. sys/fs/nfsserver/nfs_nfsdsocket.c: In nfsrvd_dorpc(), add NFSPROC_WRITE to the list of RPC types that use the LK_SHARED lock type. sys/fs/nfsserver/nfs_nfsdport.c: In nfsd_fhtovp(), if we're starting a write, check to see whether the underlying filesystem supports shared writes. If not, upgrade the lock type from LK_SHARED to LK_EXCLUSIVE. sys/nfsserver/nfs_fha.c: Remove all code that is specific to the NFS server implementation. Anything that is server-specific is now accessed through a callback supplied by that server's FHA shim in the new softc. There are now separate sysctls and tunables for the FHA implementations for the old and new NFS servers. The new NFS server has its tunables under vfs.nfsd.fha, the old NFS server's tunables are under vfs.nfsrv.fha as before. In fha_extract_info(), use callouts for all server-specific code. Getting file handles and offsets is now done in the individual server's shim module. In fha_hash_entry_choose_thread(), change the way we decide whether two reads are in proximity to each other. Previously, the calculation was a simple shift operation to see whether the offsets were in the same power of 2 bucket. The issue was that there would be a bucket (and therefore thread) transition, even if the reads were in close proximity. When there is a thread transition, reads wind up going somewhat out of order, and ZFS gets confused. The new calculation simply tries to see whether the offsets are within 1 << bin_shift of each other. If they are, the reads will be sent to the same thread. The effect of this change is that for sequential reads, if the client doesn't exceed the max_reqs_per_nfsd parameter and the bin_shift is set to a reasonable value (22, or 4MB works well in my tests), the reads in any sequential stream will largely be confined to a single thread. Change fha_assign() so that it takes a softc argument. It is now called from the individual server's shim code, which will pass in the softc. Change fhe_stats_sysctl() so that it takes a softc parameter. It is now called from the individual server's shim code. Add the current offset to the list of things printed out about each active thread. Change the num_reads and num_writes counters in the fha_hash_entry structure to 32-bit values, and rename them num_rw and num_exclusive, respectively, to reflect their changed usage. Add an enable sysctl and tunable that allows the user to disable the FHA code (when vfs.XXX.fha.enable = 0). This is useful for before/after performance comparisons. nfs_fha.h: Move most structure definitions out of nfs_fha.c and into the header file, so that the individual server shims can see them. Change the default bin_shift to 22 (4MB) instead of 18 (256K). Allow unlimited commands per thread. sys/nfsserver/nfs_fha_old.c, sys/nfsserver/nfs_fha_old.h, sys/fs/nfsserver/nfs_fha_new.c, sys/fs/nfsserver/nfs_fha_new.h: Add shims for the old and new NFS servers to interface with the FHA code, and callbacks for the The shims contain all of the code and definitions that are specific to the NFS servers. They setup the server-specific callbacks and set the server name for the sysctl and loader tunable variables. sys/nfsserver/nfs_srvkrpc.c: Configure the RPC code to call fhaold_assign() instead of fha_assign(). sys/modules/nfsd/Makefile: Add nfs_fha.c and nfs_fha_new.c. sys/modules/nfsserver/Makefile: Add nfs_fha_old.c. Reviewed by: rmacklem Sponsored by: Spectra Logic MFC after: 2 weeks
2013-04-17 21:00:22 +00:00
void *nfsm_dissct(struct nfsrv_descript *, int, int);
void newnfs_trimleading(struct nfsrv_descript *);
void newnfs_trimtrailing(struct nfsrv_descript *, mbuf_t,
caddr_t);
void newnfs_copycred(struct nfscred *, struct ucred *);
void newnfs_copyincred(struct ucred *, struct nfscred *);
int nfsrv_dissectacl(struct nfsrv_descript *, NFSACL_T *, int *,
int *, NFSPROC_T *);
int nfsrv_getattrbits(struct nfsrv_descript *, nfsattrbit_t *, int *,
int *);
int nfsv4_loadattr(struct nfsrv_descript *, vnode_t,
struct nfsvattr *, struct nfsfh **, fhandle_t *, int,
struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *,
struct nfsfsinfo *, NFSACL_T *,
int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *);
int nfsv4_lock(struct nfsv4lock *, int, int *, void *, struct mount *);
void nfsv4_unlock(struct nfsv4lock *, int);
void nfsv4_relref(struct nfsv4lock *);
void nfsv4_getref(struct nfsv4lock *, int *, void *, struct mount *);
int nfsv4_getref_nonblock(struct nfsv4lock *);
int nfsv4_testlock(struct nfsv4lock *);
int nfsrv_mtostr(struct nfsrv_descript *, char *, int);
void nfsrv_cleanusergroup(void);
int nfsrv_checkutf8(u_int8_t *, int);
int newnfs_sndlock(int *);
void newnfs_sndunlock(int *);
int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_in *,
struct sockaddr_in6 *, sa_family_t *, int *);
int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *,
struct mbuf **, uint16_t);
void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **);
void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *,
struct nfsclsession *, int);
int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *,
int *, uint32_t *, uint8_t *);
void nfsv4_freeslot(struct nfsclsession *, int);
struct ucred *nfsrv_getgrpscred(struct ucred *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
struct nfsdevice *nfsv4_findmirror(struct nfsmount *);
/* nfs_clcomsubs.c */
void nfsm_uiombuf(struct nfsrv_descript *, struct uio *, int);
struct mbuf *nfsm_uiombuflist(struct uio *, int, struct mbuf **, char **);
nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int);
u_int8_t *nfscl_getmyip(struct nfsmount *, struct in6_addr *, int *);
int nfsm_getfh(struct nfsrv_descript *, struct nfsfh **);
int nfscl_mtofh(struct nfsrv_descript *, struct nfsfh **,
struct nfsvattr *, int *);
int nfscl_postop_attr(struct nfsrv_descript *, struct nfsvattr *, int *,
void *);
int nfscl_wcc_data(struct nfsrv_descript *, vnode_t,
struct nfsvattr *, int *, int *, void *);
int nfsm_loadattr(struct nfsrv_descript *, struct nfsvattr *);
int nfscl_request(struct nfsrv_descript *, vnode_t,
NFSPROC_T *, struct ucred *, void *);
/* nfs_nfsdsubs.c */
void nfsd_fhtovp(struct nfsrv_descript *, struct nfsrvfh *, int,
vnode_t *, struct nfsexstuff *, mount_t *, int);
int nfsd_excred(struct nfsrv_descript *, struct nfsexstuff *, struct ucred *);
int nfsrv_mtofh(struct nfsrv_descript *, struct nfsrvfh *);
int nfsrv_putattrbit(struct nfsrv_descript *, nfsattrbit_t *);
void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int,
struct nfsvattr *);
int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *,
struct vattr *, fhandle_t *, int, nfsattrbit_t *,
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *);
void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *);
void nfsrv_adj(mbuf_t, int, int);
void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *);
int nfsd_errmap(struct nfsrv_descript *);
void nfsv4_uidtostr(uid_t, u_char **, int *);
int nfsv4_strtouid(struct nfsrv_descript *, u_char *, int, uid_t *);
void nfsv4_gidtostr(gid_t, u_char **, int *);
int nfsv4_strtogid(struct nfsrv_descript *, u_char *, int, gid_t *);
int nfsrv_checkuidgid(struct nfsrv_descript *, struct nfsvattr *);
void nfsrv_fixattr(struct nfsrv_descript *, vnode_t,
struct nfsvattr *, NFSACL_T *, NFSPROC_T *, nfsattrbit_t *,
struct nfsexstuff *);
int nfsrv_errmoved(int);
int nfsrv_putreferralattr(struct nfsrv_descript *, nfsattrbit_t *,
struct nfsreferral *, int, int *);
int nfsrv_parsename(struct nfsrv_descript *, char *, u_long *,
NFSPATHLEN_T *);
void nfsd_init(void);
int nfsd_checkrootexp(struct nfsrv_descript *);
void nfsd_getminorvers(struct nfsrv_descript *, u_char *, u_char **, int *,
u_int32_t *);
/* nfs_clvfsops.c */
void nfscl_retopts(struct nfsmount *, char *, size_t);
/* nfs_commonport.c */
int nfsrv_lookupfilename(struct nameidata *, char *, NFSPROC_T *);
void nfsrv_object_create(vnode_t, NFSPROC_T *);
int nfsrv_mallocmget_limit(void);
int nfsvno_v4rootexport(struct nfsrv_descript *);
void newnfs_portinit(void);
struct ucred *newnfs_getcred(void);
void newnfs_setroot(struct ucred *);
int nfs_catnap(int, int, const char *);
struct nfsreferral *nfsv4root_getreferral(vnode_t, vnode_t, u_int32_t);
int nfsvno_pathconf(vnode_t, int, long *, struct ucred *, NFSPROC_T *);
int nfsrv_atroot(vnode_t, uint64_t *);
void newnfs_timer(void *);
int nfs_supportsnfsv4acls(vnode_t);
/* nfs_commonacl.c */
int nfsrv_dissectace(struct nfsrv_descript *, struct acl_entry *,
int *, int *, NFSPROC_T *);
int nfsrv_buildacl(struct nfsrv_descript *, NFSACL_T *, enum vtype,
NFSPROC_T *);
int nfsrv_compareacl(NFSACL_T *, NFSACL_T *);
/* nfs_clrpcops.c */
int nfsrpc_null(vnode_t, struct ucred *, NFSPROC_T *);
int nfsrpc_access(vnode_t, int, struct ucred *, NFSPROC_T *,
struct nfsvattr *, int *);
int nfsrpc_accessrpc(vnode_t, u_int32_t, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, u_int32_t *, void *);
int nfsrpc_open(vnode_t, int, struct ucred *, NFSPROC_T *);
int nfsrpc_openrpc(struct nfsmount *, vnode_t, u_int8_t *, int, u_int8_t *, int,
u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int,
u_int32_t, struct ucred *, NFSPROC_T *, int, int);
int nfsrpc_opendowngrade(vnode_t, u_int32_t, struct nfsclopen *,
struct ucred *, NFSPROC_T *);
int nfsrpc_close(vnode_t, int, NFSPROC_T *);
int nfsrpc_closerpc(struct nfsrv_descript *, struct nfsmount *,
struct nfsclopen *, struct ucred *, NFSPROC_T *, int);
int nfsrpc_openconfirm(vnode_t, u_int8_t *, int, struct nfsclopen *,
struct ucred *, NFSPROC_T *);
int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, int,
struct ucred *, NFSPROC_T *);
int nfsrpc_getattr(vnode_t, struct ucred *, NFSPROC_T *,
struct nfsvattr *, void *);
int nfsrpc_getattrnovp(struct nfsmount *, u_int8_t *, int, int,
struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *, uint32_t *);
int nfsrpc_setattr(vnode_t, struct vattr *, NFSACL_T *, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_lookup(vnode_t, char *, int, struct ucred *, NFSPROC_T *,
struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *,
void *);
int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *,
struct nfsvattr *, int *, void *);
int nfsrpc_write(vnode_t, struct uio *, int *, int *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int);
int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t,
enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *,
struct nfsvattr *, struct nfsfh **, int *, int *, void *);
int nfsrpc_create(vnode_t, char *, int, struct vattr *, nfsquad_t,
int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
struct nfsfh **, int *, int *, void *);
int nfsrpc_remove(vnode_t, char *, int, vnode_t, struct ucred *, NFSPROC_T *,
struct nfsvattr *, int *, void *);
int nfsrpc_rename(vnode_t, vnode_t, char *, int, vnode_t, vnode_t, char *, int,
struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
int *, int *, void *, void *);
int nfsrpc_link(vnode_t, vnode_t, char *, int,
struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
int *, int *, void *);
int nfsrpc_symlink(vnode_t, char *, int, const char *, struct vattr *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
struct nfsfh **, int *, int *, void *);
int nfsrpc_mkdir(vnode_t, char *, int, struct vattr *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
struct nfsfh **, int *, int *, void *);
int nfsrpc_rmdir(vnode_t, char *, int, struct ucred *, NFSPROC_T *,
struct nfsvattr *, int *, void *);
int nfsrpc_readdir(vnode_t, struct uio *, nfsuint64 *, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, int *, void *);
int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *);
int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int,
struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lockt(struct nfsrv_descript *, vnode_t,
struct nfsclclient *, u_int64_t, u_int64_t, struct flock *,
struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t,
u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t,
u_int64_t, short, struct ucred *, NFSPROC_T *, int);
int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *,
NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *,
struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *,
NFSPROC_T *);
int nfsrpc_rellockown(struct nfsmount *, struct nfscllockowner *, uint8_t *,
int, struct ucred *, NFSPROC_T *);
int nfsrpc_getdirpath(struct nfsmount *, u_char *, struct ucred *,
NFSPROC_T *);
int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *,
struct nfsmount *, NFSPROC_T *, int);
int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *);
int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *);
int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *,
struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *,
NFSPROC_T *);
int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *,
struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *);
int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *,
struct ucred *, NFSPROC_T *);
int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *,
struct ucred *, NFSPROC_T *);
int nfsrpc_getdeviceinfo(struct nfsmount *, uint8_t *, int, uint32_t *,
struct nfscldevinfo **, struct ucred *, NFSPROC_T *);
int nfsrpc_layoutcommit(struct nfsmount *, uint8_t *, int, int,
uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, struct ucred *,
NFSPROC_T *, void *);
int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t,
int, uint64_t, uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *,
uint32_t, uint32_t, char *);
int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *);
int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, int,
struct ucred *, NFSPROC_T *);
int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t,
struct nfsclflayout **);
void nfscl_freenfsclds(struct nfsclds *);
/* nfs_clstate.c */
int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **,
int *, int *, int);
int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *,
NFSPROC_T *, nfsv4stateid_t *, void **);
void nfscl_ownerrelease(struct nfsmount *, struct nfsclowner *, int, int, int);
void nfscl_openrelease(struct nfsmount *, struct nfsclopen *, int, int);
int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int,
struct nfsclclient **);
struct nfsclclient *nfscl_findcl(struct nfsmount *);
void nfscl_clientrelease(struct nfsclclient *);
void nfscl_freelock(struct nfscllock *, int);
void nfscl_freelockowner(struct nfscllockowner *, int);
int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short,
struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int,
u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *);
int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t,
struct ucred *, NFSPROC_T *, int, struct nfsclclient *,
void *, int, struct nfscllockowner **, int *);
int nfscl_checkwritelocked(vnode_t, struct flock *,
struct ucred *, NFSPROC_T *, void *, int);
void nfscl_lockrelease(struct nfscllockowner *, int, int);
void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t);
void nfscl_filllockowner(void *, u_int8_t *, int);
void nfscl_freeopen(struct nfsclopen *, int);
void nfscl_umount(struct nfsmount *, NFSPROC_T *);
void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *);
void nfscl_initiate_recovery(struct nfsclclient *);
int nfscl_hasexpired(struct nfsclclient *, u_int32_t, NFSPROC_T *);
void nfscl_dumpstate(struct nfsmount *, int, int, int, int);
void nfscl_dupopen(vnode_t, int);
int nfscl_getclose(vnode_t, struct nfsclclient **);
int nfscl_doclose(vnode_t, struct nfsclclient **, NFSPROC_T *);
void nfsrpc_doclose(struct nfsmount *, struct nfsclopen *, NFSPROC_T *);
int nfscl_deleg(mount_t, struct nfsclclient *, u_int8_t *, int,
struct ucred *, NFSPROC_T *, struct nfscldeleg **);
void nfscl_lockinit(struct nfsv4lock *);
void nfscl_lockexcl(struct nfsv4lock *, void *);
void nfscl_lockunlock(struct nfsv4lock *);
void nfscl_lockderef(struct nfsv4lock *);
void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
int);
int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t,
u_int64_t, struct flock *, NFSPROC_T *, void *, int);
int nfscl_mustflush(vnode_t);
int nfscl_nodeleg(vnode_t, int);
int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *);
int nfscl_getref(struct nfsmount *);
void nfscl_relref(struct nfsmount *);
int nfscl_renamedeleg(vnode_t, nfsv4stateid_t *, int *, vnode_t,
nfsv4stateid_t *, int *, NFSPROC_T *);
void nfscl_reclaimnode(vnode_t);
void nfscl_newnode(vnode_t);
void nfscl_delegmodtime(vnode_t);
void nfscl_deleggetmodtime(vnode_t, struct timespec *);
int nfscl_tryclose(struct nfsclopen *, struct ucred *,
struct nfsmount *, NFSPROC_T *);
void nfscl_cleanup(NFSPROC_T *);
int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *,
int, int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *,
NFSPROC_T *);
struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int,
uint64_t, struct nfsclflayout **, int *);
void nfscl_dserr(uint32_t, uint32_t, struct nfscldevinfo *,
struct nfscllayout *, struct nfsclds *);
void nfscl_cancelreqs(struct nfsclds *);
void nfscl_rellayout(struct nfscllayout *, int);
struct nfscldevinfo *nfscl_getdevinfo(struct nfsclclient *, uint8_t *,
struct nfscldevinfo *);
void nfscl_reldevinfo(struct nfscldevinfo *);
int nfscl_adddevinfo(struct nfsmount *, struct nfscldevinfo *, int,
struct nfsclflayout *);
void nfscl_freelayout(struct nfscllayout *);
void nfscl_freeflayout(struct nfsclflayout *);
void nfscl_freedevinfo(struct nfscldevinfo *);
int nfscl_layoutcommit(vnode_t, NFSPROC_T *);
/* nfs_clport.c */
int nfscl_nget(mount_t, vnode_t, struct nfsfh *,
struct componentname *, NFSPROC_T *, struct nfsnode **, void *, int);
NFSPROC_T *nfscl_getparent(NFSPROC_T *);
void nfscl_start_renewthread(struct nfsclclient *);
void nfscl_loadsbinfo(struct nfsmount *, struct nfsstatfs *, void *);
void nfscl_loadfsinfo (struct nfsmount *, struct nfsfsinfo *);
void nfscl_delegreturn(struct nfscldeleg *, int, struct nfsmount *,
struct ucred *, NFSPROC_T *);
void nfsrvd_cbinit(int);
int nfscl_checksattr(struct vattr *, struct nfsvattr *);
int nfscl_ngetreopen(mount_t, u_int8_t *, int, NFSPROC_T *,
struct nfsnode **);
int nfscl_procdoesntexist(u_int8_t *);
int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t);
/* nfs_clsubs.c */
void nfscl_init(void);
/* nfs_clbio.c */
int ncl_flush(vnode_t, int, NFSPROC_T *, int, int);
/* nfs_clnode.c */
void ncl_invalcaches(vnode_t);
/* nfs_nfsdport.c */
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsvno_getattr(vnode_t, struct nfsvattr *, struct nfsrv_descript *,
NFSPROC_T *, int, nfsattrbit_t *);
int nfsvno_setattr(vnode_t, struct nfsvattr *, struct ucred *,
NFSPROC_T *, struct nfsexstuff *);
int nfsvno_getfh(vnode_t, fhandle_t *, NFSPROC_T *);
int nfsvno_accchk(vnode_t, accmode_t, struct ucred *,
struct nfsexstuff *, NFSPROC_T *, int, int, u_int32_t *);
int nfsvno_namei(struct nfsrv_descript *, struct nameidata *,
vnode_t, int, struct nfsexstuff *, NFSPROC_T *, vnode_t *);
void nfsvno_setpathbuf(struct nameidata *, char **, u_long **);
void nfsvno_relpathbuf(struct nameidata *);
int nfsvno_readlink(vnode_t, struct ucred *, NFSPROC_T *, mbuf_t *,
mbuf_t *, int *);
int nfsvno_read(vnode_t, off_t, int, struct ucred *, NFSPROC_T *,
mbuf_t *, mbuf_t *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsvno_write(vnode_t, off_t, int, int, int *, mbuf_t,
char *, struct ucred *, NFSPROC_T *);
int nfsvno_createsub(struct nfsrv_descript *, struct nameidata *,
vnode_t *, struct nfsvattr *, int *, int32_t *, NFSDEV_T,
struct nfsexstuff *);
int nfsvno_mknod(struct nameidata *, struct nfsvattr *, struct ucred *,
NFSPROC_T *);
int nfsvno_mkdir(struct nameidata *,
struct nfsvattr *, uid_t, struct ucred *, NFSPROC_T *,
struct nfsexstuff *);
int nfsvno_symlink(struct nameidata *, struct nfsvattr *, char *, int, int,
uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *);
int nfsvno_getsymlink(struct nfsrv_descript *, struct nfsvattr *,
NFSPROC_T *, char **, int *);
int nfsvno_removesub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
struct nfsexstuff *);
int nfsvno_rmdirsub(struct nameidata *, int, struct ucred *, NFSPROC_T *,
struct nfsexstuff *);
int nfsvno_rename(struct nameidata *, struct nameidata *, u_int32_t,
u_int32_t, struct ucred *, NFSPROC_T *);
int nfsvno_link(struct nameidata *, vnode_t, struct ucred *,
NFSPROC_T *, struct nfsexstuff *);
int nfsvno_fsync(vnode_t, u_int64_t, int, struct ucred *, NFSPROC_T *);
int nfsvno_statfs(vnode_t, struct statfs *);
void nfsvno_getfs(struct nfsfsinfo *, int);
void nfsvno_open(struct nfsrv_descript *, struct nameidata *, nfsquad_t,
nfsv4stateid_t *, struct nfsstate *, int *, struct nfsvattr *, int32_t *,
int, NFSACL_T *, nfsattrbit_t *, struct ucred *,
struct nfsexstuff *, vnode_t *);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *,
NFSPROC_T *);
int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t,
struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *,
struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t);
int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
NFSACL_T *, NFSPROC_T *);
int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *,
NFSACL_T *, NFSPROC_T *);
int nfsvno_checkexp(mount_t, NFSSOCKADDR_T, struct nfsexstuff *,
struct ucred **);
int nfsvno_fhtovp(mount_t, fhandle_t *, NFSSOCKADDR_T, int,
vnode_t *, struct nfsexstuff *, struct ucred **);
vnode_t nfsvno_getvp(fhandle_t *);
int nfsvno_advlock(vnode_t, int, u_int64_t, u_int64_t, NFSPROC_T *);
int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *);
int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *);
uint32_t nfsrv_hashfh(fhandle_t *);
uint32_t nfsrv_hashsessionid(uint8_t *);
void nfsrv_backupstable(void);
Merge the pNFS server code from projects/pnfs-planb-server into head. This code merge adds a pNFS service to the NFSv4.1 server. Although it is a large commit it should not affect behaviour for a non-pNFS NFS server. Some documentation on how this works can be found at: http://people.freebsd.org/~rmacklem/pnfs-planb-setup.txt and will hopefully be turned into a proper document soon. This is a merge of the kernel code. Userland and man page changes will come soon, once the dust settles on this merge. It has passed a "make universe", so I hope it will not cause build problems. It also adds NFSv4.1 server support for the "current stateid". Here is a brief overview of the pNFS service: A pNFS service separates the Read/Write oeprations from all the other NFSv4.1 Metadata operations. It is hoped that this separation allows a pNFS service to be configured that exceeds the limits of a single NFS server for either storage capacity and/or I/O bandwidth. It is possible to configure mirroring within the data servers (DSs) so that the data storage file for an MDS file will be mirrored on two or more of the DSs. When this is used, failure of a DS will not stop the pNFS service and a failed DS can be recovered once repaired while the pNFS service continues to operate. Although two way mirroring would be the norm, it is possible to set a mirroring level of up to four or the number of DSs, whichever is less. The Metadata server will always be a single point of failure, just as a single NFS server is. A Plan B pNFS service consists of a single MetaData Server (MDS) and K Data Servers (DS), all of which are recent FreeBSD systems. Clients will mount the MDS as they would a single NFS server. When files are created, the MDS creates a file tree identical to what a single NFS server creates, except that all the regular (VREG) files will be empty. As such, if you look at the exported tree on the MDS directly on the MDS server (not via an NFS mount), the files will all be of size 0. Each of these files will also have two extended attributes in the system attribute name space: pnfsd.dsfile - This extended attrbute stores the information that the MDS needs to find the data storage file(s) on DS(s) for this file. pnfsd.dsattr - This extended attribute stores the Size, AccessTime, ModifyTime and Change attributes for the file, so that the MDS doesn't need to acquire the attributes from the DS for every Getattr operation. For each regular (VREG) file, the MDS creates a data storage file on one (or more if mirroring is enabled) of the DSs in one of the "dsNN" subdirectories. The name of this file is the file handle of the file on the MDS in hexadecimal so that the name is unique. The DSs use subdirectories named "ds0" to "dsN" so that no one directory gets too large. The value of "N" is set via the sysctl vfs.nfsd.dsdirsize on the MDS, with the default being 20. For production servers that will store a lot of files, this value should probably be much larger. It can be increased when the "nfsd" daemon is not running on the MDS, once the "dsK" directories are created. For pNFS aware NFSv4.1 clients, the FreeBSD server will return two pieces of information to the client that allows it to do I/O directly to the DS. DeviceInfo - This is relatively static information that defines what a DS is. The critical bits of information returned by the FreeBSD server is the IP address of the DS and, for the Flexible File layout, that NFSv4.1 is to be used and that it is "tightly coupled". There is a "deviceid" which identifies the DeviceInfo. Layout - This is per file and can be recalled by the server when it is no longer valid. For the FreeBSD server, there is support for two types of layout, call File and Flexible File layout. Both allow the client to do I/O on the DS via NFSv4.1 I/O operations. The Flexible File layout is a more recent variant that allows specification of mirrors, where the client is expected to do writes to all mirrors to maintain them in a consistent state. The Flexible File layout also allows the client to report I/O errors for a DS back to the MDS. The Flexible File layout supports two variants referred to as "tightly coupled" vs "loosely coupled". The FreeBSD server always uses the "tightly coupled" variant where the client uses the same credentials to do I/O on the DS as it would on the MDS. For the "loosely coupled" variant, the layout specifies a synthetic user/group that the client uses to do I/O on the DS. The FreeBSD server does not do striping and always returns layouts for the entire file. The critical information in a layout is Read vs Read/Writea and DeviceID(s) that identify which DS(s) the data is stored on. At this time, the MDS generates File Layout layouts to NFSv4.1 clients that know how to do pNFS for the non-mirrored DS case unless the sysctl vfs.nfsd.default_flexfile is set non-zero, in which case Flexible File layouts are generated. The mirrored DS configuration always generates Flexible File layouts. For NFS clients that do not support NFSv4.1 pNFS, all I/O operations are done against the MDS which acts as a proxy for the appropriate DS(s). When the MDS receives an I/O RPC, it will do the RPC on the DS as a proxy. If the DS is on the same machine, the MDS/DS will do the RPC on the DS as a proxy and so on, until the machine runs out of some resource, such as session slots or mbufs. As such, DSs must be separate systems from the MDS. Tested by: james.rose@framestore.com Relnotes: yes
2018-06-12 19:36:32 +00:00
int nfsrv_dsgetdevandfh(struct vnode *, NFSPROC_T *, int *, fhandle_t *,
char *);
int nfsrv_dsgetsockmnt(struct vnode *, int, char *, int *, int *,
NFSPROC_T *, struct vnode **, fhandle_t *, char *, char *,
struct vnode **, struct nfsmount **, struct nfsmount *, int *, int *);
int nfsrv_dscreate(struct vnode *, struct vattr *, struct vattr *,
fhandle_t *, struct pnfsdsfile *, struct pnfsdsattr *, char *,
struct ucred *, NFSPROC_T *, struct vnode **);
int nfsrv_updatemdsattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
void nfsrv_killrpcs(struct nfsmount *);
int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *);
/* nfs_commonkrpc.c */
int newnfs_nmcancelreqs(struct nfsmount *);
void newnfs_set_sigmask(struct thread *, sigset_t *);
void newnfs_restore_sigmask(struct thread *, sigset_t *);
int newnfs_msleep(struct thread *, void *, struct mtx *, int, char *, int);
int newnfs_request(struct nfsrv_descript *, struct nfsmount *,
struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *,
struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *,
struct nfsclsession *);
int newnfs_connect(struct nfsmount *, struct nfssockreq *,
struct ucred *, NFSPROC_T *, int);
void newnfs_disconnect(struct nfssockreq *);
int newnfs_sigintr(struct nfsmount *, NFSPROC_T *);
/* nfs_nfsdkrpc.c */
int nfsrvd_addsock(struct file *);
int nfsrvd_nfsd(NFSPROC_T *, struct nfsd_nfsd_args *);
void nfsrvd_init(int);
/* nfs_clkrpc.c */
int nfscbd_addsock(struct file *);
int nfscbd_nfsd(NFSPROC_T *, struct nfsd_nfscbd_args *);