freebsd-nq/sys/nfs/nfs_diskless.c
Matt Macy 4f6c66cc9c UDP: further performance improvements on tx
Cumulative throughput while running 64
  netperf -H $DUT -t UDP_STREAM -- -m 1
on a 2x8x2 SKL went from 1.1Mpps to 2.5Mpps

Single stream throughput increases from 910kpps to 1.18Mpps

Baseline:
https://people.freebsd.org/~mmacy/2018.05.11/udpsender2.svg

- Protect read access to global ifnet list with epoch
https://people.freebsd.org/~mmacy/2018.05.11/udpsender3.svg

- Protect short lived ifaddr references with epoch
https://people.freebsd.org/~mmacy/2018.05.11/udpsender4.svg

- Convert if_afdata read lock path to epoch
https://people.freebsd.org/~mmacy/2018.05.11/udpsender5.svg

A fix for the inpcbhash contention is pending sufficient time
on a canary at LLNW.

Reviewed by:	gallatin
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D15409
2018-05-23 21:02:14 +00:00

440 lines
12 KiB
C

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* William Jolitz.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)autoconf.c 7.1 (Berkeley) 5/9/91
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_bootp.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/ethernet.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <nfs/nfsproto.h>
#include <nfsclient/nfs.h>
#include <nfs/nfsdiskless.h>
#define NFS_IFACE_TIMEOUT_SECS 10 /* Timeout for interface to appear. */
static int inaddr_to_sockaddr(char *ev, struct sockaddr_in *sa);
static int hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa);
static int decode_nfshandle(char *ev, u_char *fh, int maxfh);
/*
* This structure must be filled in by a primary bootstrap or bootstrap
* server for a diskless/dataless machine. It is initialized below just
* to ensure that it is allocated to initialized data (.data not .bss).
*/
struct nfs_diskless nfs_diskless = { { { 0 } } };
struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
int nfs_diskless_valid = 0;
/*
* Validate/sanity check a rsize/wsize parameter.
*/
static int
checkrwsize(unsigned long v, const char *name)
{
/*
* 32K is used as an upper bound because most servers
* limit block size to satisfy IPv4's limit of
* 64K/reassembled packet. The lower bound is pretty
* much arbitrary.
*/
if (!(4 <= v && v <= 32*1024)) {
printf("nfs_parse_options: invalid %s %lu ignored\n", name, v);
return 0;
} else
return 1;
}
/*
* Parse mount options and apply them to the supplied
* nfs_diskless state. Used also by bootp/dhcp support.
*/
void
nfs_parse_options(const char *envopts, struct nfs_args *nd)
{
char *opts, *o, *otmp;
unsigned long v;
opts = strdup(envopts, M_TEMP);
otmp = opts;
while ((o = strsep(&otmp, ":;, ")) != NULL) {
if (*o == '\0')
; /* Skip empty options. */
else if (strcmp(o, "soft") == 0)
nd->flags |= NFSMNT_SOFT;
else if (strcmp(o, "intr") == 0)
nd->flags |= NFSMNT_INT;
else if (strcmp(o, "conn") == 0)
nd->flags |= NFSMNT_NOCONN;
else if (strcmp(o, "nolockd") == 0)
nd->flags |= NFSMNT_NOLOCKD;
else if (strcmp(o, "nocto") == 0)
nd->flags |= NFSMNT_NOCTO;
else if (strcmp(o, "nfsv2") == 0)
nd->flags &= ~(NFSMNT_NFSV3 | NFSMNT_NFSV4);
else if (strcmp(o, "nfsv3") == 0) {
nd->flags &= ~NFSMNT_NFSV4;
nd->flags |= NFSMNT_NFSV3;
} else if (strcmp(o, "tcp") == 0)
nd->sotype = SOCK_STREAM;
else if (strcmp(o, "udp") == 0)
nd->sotype = SOCK_DGRAM;
else if (strncmp(o, "rsize=", 6) == 0) {
v = strtoul(o+6, NULL, 10);
if (checkrwsize(v, "rsize")) {
nd->rsize = (int) v;
nd->flags |= NFSMNT_RSIZE;
}
} else if (strncmp(o, "wsize=", 6) == 0) {
v = strtoul(o+6, NULL, 10);
if (checkrwsize(v, "wsize")) {
nd->wsize = (int) v;
nd->flags |= NFSMNT_WSIZE;
}
} else
printf("%s: skipping unknown option \"%s\"\n",
__func__, o);
}
free(opts, M_TEMP);
}
/*
* Populate the essential fields in the nfsv3_diskless structure.
*
* The loader is expected to export the following environment variables:
*
* boot.netif.name name of boot interface
* boot.netif.ip IP address on boot interface
* boot.netif.netmask netmask on boot interface
* boot.netif.gateway default gateway (optional)
* boot.netif.hwaddr hardware address of boot interface
* boot.netif.mtu interface mtu from bootp/dhcp (optional)
* boot.nfsroot.server IP address of root filesystem server
* boot.nfsroot.path path of the root filesystem on server
* boot.nfsroot.nfshandle NFS handle for root filesystem on server
* boot.nfsroot.nfshandlelen and length of this handle (for NFSv3 only)
* boot.nfsroot.options NFS options for the root filesystem
*/
void
nfs_setup_diskless(void)
{
struct nfs_diskless *nd = &nfs_diskless;
struct nfsv3_diskless *nd3 = &nfsv3_diskless;
struct ifnet *ifp;
struct ifaddr *ifa;
struct sockaddr_dl *sdl, ourdl;
struct sockaddr_in myaddr, netmask;
char *cp;
int cnt, fhlen, is_nfsv3;
uint32_t len;
time_t timeout_at;
if (nfs_diskless_valid != 0)
return;
/* get handle size. If this succeeds, it's an NFSv3 setup. */
if ((cp = kern_getenv("boot.nfsroot.nfshandlelen")) != NULL) {
cnt = sscanf(cp, "%d", &len);
freeenv(cp);
if (cnt != 1 || len == 0 || len > NFSX_V3FHMAX) {
printf("nfs_diskless: bad NFS handle len\n");
return;
}
nd3->root_fhsize = len;
is_nfsv3 = 1;
} else
is_nfsv3 = 0;
/* set up interface */
if (inaddr_to_sockaddr("boot.netif.ip", &myaddr))
return;
if (inaddr_to_sockaddr("boot.netif.netmask", &netmask)) {
printf("nfs_diskless: no netmask\n");
return;
}
if (is_nfsv3 != 0) {
bcopy(&myaddr, &nd3->myif.ifra_addr, sizeof(myaddr));
bcopy(&myaddr, &nd3->myif.ifra_broadaddr, sizeof(myaddr));
((struct sockaddr_in *)
&nd3->myif.ifra_broadaddr)->sin_addr.s_addr =
myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr;
bcopy(&netmask, &nd3->myif.ifra_mask, sizeof(netmask));
} else {
bcopy(&myaddr, &nd->myif.ifra_addr, sizeof(myaddr));
bcopy(&myaddr, &nd->myif.ifra_broadaddr, sizeof(myaddr));
((struct sockaddr_in *)
&nd->myif.ifra_broadaddr)->sin_addr.s_addr =
myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr;
bcopy(&netmask, &nd->myif.ifra_mask, sizeof(netmask));
}
if (hwaddr_to_sockaddr("boot.netif.hwaddr", &ourdl)) {
printf("nfs_diskless: no hardware address\n");
return;
}
ifa = NULL;
timeout_at = time_uptime + NFS_IFACE_TIMEOUT_SECS;
retry:
CURVNET_SET(TD_TO_VNET(curthread));
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_LINK) {
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
if ((sdl->sdl_type == ourdl.sdl_type) &&
(sdl->sdl_alen == ourdl.sdl_alen) &&
!bcmp(LLADDR(sdl),
LLADDR(&ourdl),
sdl->sdl_alen)) {
IFNET_RUNLOCK();
CURVNET_RESTORE();
goto match_done;
}
}
}
}
IFNET_RUNLOCK();
CURVNET_RESTORE();
if (time_uptime < timeout_at) {
pause("nfssdl", hz / 5);
goto retry;
}
printf("nfs_diskless: no interface\n");
return; /* no matching interface */
match_done:
kern_setenv("boot.netif.name", ifp->if_xname);
if (is_nfsv3 != 0) {
strlcpy(nd3->myif.ifra_name, ifp->if_xname,
sizeof(nd3->myif.ifra_name));
/* set up gateway */
inaddr_to_sockaddr("boot.netif.gateway", &nd3->mygateway);
/* set up root mount */
nd3->root_args.rsize = 32768; /* XXX tunable? */
nd3->root_args.wsize = 32768;
nd3->root_args.sotype = SOCK_STREAM;
nd3->root_args.flags = (NFSMNT_NFSV3 | NFSMNT_WSIZE |
NFSMNT_RSIZE | NFSMNT_RESVPORT);
if (inaddr_to_sockaddr("boot.nfsroot.server",
&nd3->root_saddr)) {
printf("nfs_diskless: no server\n");
return;
}
nd3->root_saddr.sin_port = htons(NFS_PORT);
fhlen = decode_nfshandle("boot.nfsroot.nfshandle",
&nd3->root_fh[0], NFSX_V3FHMAX);
if (fhlen == 0) {
printf("nfs_diskless: no NFS handle\n");
return;
}
if (fhlen != nd3->root_fhsize) {
printf("nfs_diskless: bad NFS handle len=%d\n", fhlen);
return;
}
if ((cp = kern_getenv("boot.nfsroot.path")) != NULL) {
strncpy(nd3->root_hostnam, cp, MNAMELEN - 1);
freeenv(cp);
}
if ((cp = kern_getenv("boot.nfsroot.options")) != NULL) {
nfs_parse_options(cp, &nd3->root_args);
freeenv(cp);
}
nfs_diskless_valid = 3;
} else {
strlcpy(nd->myif.ifra_name, ifp->if_xname,
sizeof(nd->myif.ifra_name));
/* set up gateway */
inaddr_to_sockaddr("boot.netif.gateway", &nd->mygateway);
/* set up root mount */
nd->root_args.rsize = 8192; /* XXX tunable? */
nd->root_args.wsize = 8192;
nd->root_args.sotype = SOCK_STREAM;
nd->root_args.flags = (NFSMNT_WSIZE |
NFSMNT_RSIZE | NFSMNT_RESVPORT);
if (inaddr_to_sockaddr("boot.nfsroot.server",
&nd->root_saddr)) {
printf("nfs_diskless: no server\n");
return;
}
nd->root_saddr.sin_port = htons(NFS_PORT);
if (decode_nfshandle("boot.nfsroot.nfshandle",
&nd->root_fh[0], NFSX_V2FH) == 0) {
printf("nfs_diskless: no NFS handle\n");
return;
}
if ((cp = kern_getenv("boot.nfsroot.path")) != NULL) {
strncpy(nd->root_hostnam, cp, MNAMELEN - 1);
freeenv(cp);
}
if ((cp = kern_getenv("boot.nfsroot.options")) != NULL) {
struct nfs_args args;
/*
* XXX yech, convert between old and current
* arg format
*/
args.flags = nd->root_args.flags;
args.sotype = nd->root_args.sotype;
args.rsize = nd->root_args.rsize;
args.wsize = nd->root_args.wsize;
nfs_parse_options(cp, &args);
nd->root_args.flags = args.flags;
nd->root_args.sotype = args.sotype;
nd->root_args.rsize = args.rsize;
nd->root_args.wsize = args.wsize;
freeenv(cp);
}
nfs_diskless_valid = 1;
}
}
static int
inaddr_to_sockaddr(char *ev, struct sockaddr_in *sa)
{
u_int32_t a[4];
char *cp;
int count;
bzero(sa, sizeof(*sa));
sa->sin_len = sizeof(*sa);
sa->sin_family = AF_INET;
if ((cp = kern_getenv(ev)) == NULL)
return (1);
count = sscanf(cp, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]);
freeenv(cp);
if (count != 4)
return (1);
sa->sin_addr.s_addr =
htonl((a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]);
return (0);
}
static int
hwaddr_to_sockaddr(char *ev, struct sockaddr_dl *sa)
{
char *cp;
u_int32_t a[6];
int count;
bzero(sa, sizeof(*sa));
sa->sdl_len = sizeof(*sa);
sa->sdl_family = AF_LINK;
sa->sdl_type = IFT_ETHER;
sa->sdl_alen = ETHER_ADDR_LEN;
if ((cp = kern_getenv(ev)) == NULL)
return (1);
count = sscanf(cp, "%x:%x:%x:%x:%x:%x",
&a[0], &a[1], &a[2], &a[3], &a[4], &a[5]);
freeenv(cp);
if (count != 6)
return (1);
sa->sdl_data[0] = a[0];
sa->sdl_data[1] = a[1];
sa->sdl_data[2] = a[2];
sa->sdl_data[3] = a[3];
sa->sdl_data[4] = a[4];
sa->sdl_data[5] = a[5];
return (0);
}
static int
decode_nfshandle(char *ev, u_char *fh, int maxfh)
{
u_char *cp, *ep;
int len, val;
ep = cp = kern_getenv(ev);
if (cp == NULL)
return (0);
if ((strlen(cp) < 2) || (*cp != 'X')) {
freeenv(ep);
return (0);
}
len = 0;
cp++;
for (;;) {
if (*cp == 'X') {
freeenv(ep);
return (len);
}
if ((sscanf(cp, "%2x", &val) != 1) || (val > 0xff)) {
freeenv(ep);
return (0);
}
*(fh++) = val;
len++;
cp += 2;
if (len > maxfh) {
freeenv(ep);
return (0);
}
}
}
#if !defined(BOOTP_NFSROOT)
static void
nfs_rootconf(void)
{
nfs_setup_diskless();
if (nfs_diskless_valid)
rootdevnames[0] = "nfs:";
}
SYSINIT(cpu_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, nfs_rootconf, NULL);
#endif