add in-kernel ttcp performance tool

This commit is contained in:
sam 2003-11-13 00:30:27 +00:00
parent 9d88fdb92b
commit 0f82ac37b7
7 changed files with 1196 additions and 0 deletions

View File

@ -28,6 +28,7 @@ kdrv KernelDriver; add/list/remove third-party kernel driver
kerncruft Shellscript to find orphaned *.c files in /sys
kerninclude Shellscript to find unused #includes in the kernel.
kernxref Shellscript to cross reference symbols in the LINT kernel.
kttcp An in-kernel version of the ttcp network performance tool
mid Create a Message-ID database for mailing lists.
pciid Generate src/share/misc/pci_vendors.
portsinfo Generate list of new ports for last two weeks.

View File

@ -0,0 +1,23 @@
# $FreeBSD$
SHELL= /bin/sh
PROG= kttcp
SRCS= kttcp.c
BINDIR= /usr/local/bin
SYSDIR= /usr/src/sys
CFLAGS += -I${SYSDIR} -Isys
all: kttcp module
module:
cd sys; SYSDIR=${SYSDIR} make
install:
install kttcp ${DESTDIR}/${BINDIR}
cd sys; SYSDIR=${SYSDIR} make install
clean:
rm -f ${PROG}
cd sys; SYSDIR=${SYSDIR} make clean

24
tools/tools/kttcp/README Normal file
View File

@ -0,0 +1,24 @@
$FreeBSD$
This is a port of Jason Thorpe's kttcp tool for testing network
performance for in-kernel applications (like NFS). The tool consists
of a loadable module and a small user-mode application. Beware
that you should match the kernel module to the kernel it is to be
used with. By default SYSDIR is set to /usr/src/sys in Makefile.
You may want to change that.
To use the tool do something like on each of two machines:
1. make
2. su; make install (installs module and kttcp in /usr/local/bin)
3. kldload kttcp
Then:
4. kttcp -r on one machine
5. kttcp -t foo on the other machine, where foo is the
machine where #4 was done.
kttcp w/o arguments gives usage. Otherwise the source is your
friend. Beware that the kernel code must mimic soreceive and sosend
for results to be meaningful.

309
tools/tools/kttcp/kttcp.c Normal file
View File

@ -0,0 +1,309 @@
/* $FreeBSD$ */
/* $NetBSD: kttcp.c,v 1.5 2002/07/11 23:32:35 simonb Exp $ */
/*
* Copyright (c) 2002 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden and Jason R. Thorpe
* for Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <netdb.h>
#include <unistd.h>
#include <stdio.h>
#include <err.h>
#include <fcntl.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include "dev/kttcp/kttcpio.h"
#define KTTCP_PORT "22222"
#define KTTCP_XMITSIZE (10*1024*1024)
#define KTTCP_SOCKBUF_DEFAULT 65536
#define KTTCP_DEVICE "/dev/kttcp"
static void
usage(void)
{
fprintf(stderr,
"usage: kttcp -r [-b sockbufsize] [-p port] [-q] [-v]\n"
" [-4] [-6]\n"
" kttcp -t [-b sockbufsize] [-n bytes] [-q] [-v] [-p port]\n"
" [-4] [-6] host\n"
);
exit(1);
}
static unsigned long long
get_bytes(const char *str)
{
unsigned long long bytes;
char *cp;
bytes = strtoull(str, &cp, 10);
if (bytes == ULLONG_MAX && errno == ERANGE)
err(1, "%s", str);
if (cp[0] != '\0') {
if (cp[1] != '\0')
errx(1, "invalid byte count: %s", str);
if (cp[0] == 'k' || cp[0] == 'K')
bytes *= 1024;
else if (cp[0] == 'm' || cp[0] == 'M')
bytes *= 1024 * 1024;
else if (cp[0] == 'g' || cp[0] == 'G')
bytes *= 1024 * 1024 * 1024;
else
errx(1, "invalid byte count modifier: %s", str);
}
return (bytes);
}
int
main(int argc, char *argv[])
{
int c, error, s, verbose, s2, kfd;
int xmitset, family;
int bufsize;
int ai_flag;
char *host;
char *portstr;
struct kttcp_io_args kio;
struct addrinfo hints, *addr, *res;
struct sockaddr_storage ss;
struct rusage rustart, ruend;
struct timeval tvtmp;
unsigned long long ull, usecs, bytespersec, bitspersec, xmitsize;
char connecthost[NI_MAXHOST];
socklen_t slen;
const int one = 1;
u_long cmd;
cmd = 0;
portstr = KTTCP_PORT;
verbose = 1;
xmitset = 0;
bufsize = KTTCP_SOCKBUF_DEFAULT;
xmitsize = KTTCP_XMITSIZE;
family = PF_UNSPEC;
while ((c = getopt(argc, argv, "46b:n:p:qrtvw:")) != -1) {
switch (c) {
case '4':
if (family != PF_UNSPEC)
usage();
family = PF_INET;
break;
case '6':
if (family != PF_UNSPEC)
usage();
family = PF_INET6;
break;
case 'b':
ull = get_bytes(optarg);
if (ull > INT_MAX)
errx(1,
"invalid socket buffer size: %s\n", optarg);
bufsize = ull;
break;
case 'n':
xmitsize = get_bytes(optarg);
if (xmitsize > KTTCP_MAX_XMIT)
xmitsize = KTTCP_MAX_XMIT;
xmitset = 1;
break;
case 'p':
portstr = optarg;
break;
case 'q':
verbose = 0;
break;
case 'r':
if (cmd != 0)
usage();
cmd = KTTCP_IO_RECV;
break;
case 't':
if (cmd != 0)
usage();
cmd = KTTCP_IO_SEND;
break;
case 'v':
verbose = 2;
break;
case '?':
default:
usage();
}
}
if (cmd == 0)
usage();
argc -= optind;
argv += optind;
if (cmd == KTTCP_IO_SEND) {
if (xmitsize <= 0 || argc < 1)
usage();
host = argv[0];
ai_flag = 0;
} else {
if (xmitset == 0)
xmitsize = KTTCP_MAX_XMIT;
host = NULL;
ai_flag = AI_PASSIVE;
}
if ((kfd = open(KTTCP_DEVICE, O_RDWR, 666)) == -1)
err(2, "open %s", KTTCP_DEVICE);
memset(&hints, 0, sizeof hints);
hints.ai_flags = ai_flag;
hints.ai_socktype = SOCK_STREAM;
hints.ai_family = family;
error = getaddrinfo(host, portstr, &hints, &addr);
if (error != 0)
errx(2, "%s", gai_strerror(error));
s = -1;
for (res = addr; res != NULL; res = res->ai_next) {
s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (s >= 0)
break;
}
if (res == NULL)
err(2, "can't create socket");
printf("kttcp: socket buffer size: %d\n", bufsize);
if (cmd == KTTCP_IO_SEND) {
if (connect(s, res->ai_addr, res->ai_addrlen) < 0)
err(2, "connect");
if (verbose) {
getnameinfo(res->ai_addr, res->ai_addrlen,
connecthost, sizeof connecthost, NULL, 0,
NI_NUMERICHOST);
printf("kttcp: connected to %s\n", connecthost);
}
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof (int))
< 0)
err(2, "setsockopt sndbuf");
kio.kio_socket = s;
} else {
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one,
sizeof (int)) < 0)
err(2, "setsockopt reuseaddr");
if (bind(s, res->ai_addr, res->ai_addrlen) < 0)
err(2, "bind");
if (listen(s, 1) < 0)
err(2, "listen");
if (verbose)
printf("kttcp: listening on port %s\n", portstr);
slen = sizeof ss;
s2 = accept(s, (struct sockaddr *)&ss, &slen);
if (s2 < 0)
err(2, "accept");
if (verbose) {
getnameinfo((struct sockaddr *)&ss, ss.ss_len,
connecthost, sizeof connecthost, NULL, 0,
NI_NUMERICHOST);
printf("kttcp: connect from %s\n", connecthost);
}
if (setsockopt(s2, SOL_SOCKET, SO_RCVBUF, &bufsize,
sizeof (int)) < 0)
err(2, "setsockopt rcvbuf");
kio.kio_socket = s2;
}
kio.kio_totalsize = xmitsize;
getrusage(RUSAGE_SELF, &rustart);
if (ioctl(kfd, cmd, &kio) == -1)
err(2, "kttcp i/o command");
getrusage(RUSAGE_SELF, &ruend);
usecs = (unsigned long long)kio.kio_elapsed.tv_sec * 1000000;
usecs += kio.kio_elapsed.tv_usec;
bytespersec = kio.kio_bytesdone * 1000000LL / usecs;
bitspersec = bytespersec * NBBY;
printf("kttcp: %llu bytes in %ld.%03ld real seconds ==> %llu bytes/sec\n",
kio.kio_bytesdone, kio.kio_elapsed.tv_sec,
kio.kio_elapsed.tv_usec / 1000, bytespersec);
if (verbose > 1) {
timersub(&ruend.ru_stime, &rustart.ru_stime, &tvtmp);
bytespersec = kio.kio_bytesdone * 1000000LL /
(tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec);
printf("kttcp: %llu bytes in %ld.%03ld CPU seconds ==> %llu bytes/CPU sec\n",
kio.kio_bytesdone, tvtmp.tv_sec, tvtmp.tv_usec / 1000, bytespersec);
}
printf(" %g (%g) Megabits/sec\n",
((double) bitspersec / 1024.0) / 1024.0,
((double) bitspersec / 1000.0) / 1000.0);
timersub(&ruend.ru_utime, &rustart.ru_utime, &tvtmp);
/* XXX
* sometimes, this ends up as -1 * hz!?
*/
if (tvtmp.tv_sec < 0)
tvtmp.tv_sec = tvtmp.tv_usec = 0;
printf(" %ld.%02lduser", tvtmp.tv_sec, tvtmp.tv_usec / 10000);
ull = tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec;
timersub(&ruend.ru_stime, &rustart.ru_stime, &tvtmp);
printf(" %ld.%02ldsys", tvtmp.tv_sec, tvtmp.tv_usec / 10000);
ull += tvtmp.tv_sec * 1000000ULL + tvtmp.tv_usec;
printf(" %lld.%lldreal", usecs / 1000000, (usecs % 1000000) / 10000);
printf(" %lld%%", ull * 100 / usecs);
printf("\n");
close(kio.kio_socket);
if (cmd == KTTCP_IO_RECV)
close(s);
close(kfd);
freeaddrinfo(addr);
return 0;
}

View File

@ -0,0 +1,8 @@
# $FreeBSD$
KMOD = kttcp
SRCS = kttcp.c
SRCS += device_if.h
MFILES = kern/device_if.m
.include <bsd.kmod.mk>

View File

@ -0,0 +1,772 @@
/* $FreeBSD$ */
/* $NetBSD: kttcp.c,v 1.3 2002/07/03 19:36:52 thorpej Exp $ */
/*
* Copyright (c) 2002 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden and Jason R. Thorpe for
* Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* kttcp.c --
*
* This module provides kernel support for testing network
* throughput from the perspective of the kernel. It is
* similar in spirit to the classic ttcp network benchmark
* program, the main difference being that with kttcp, the
* kernel is the source and sink of the data.
*
* Testing like this is useful for a few reasons:
*
* 1. This allows us to know what kind of performance we can
* expect from network applications that run in the kernel
* space, such as the NFS server or the NFS client. These
* applications don't have to move the data to/from userspace,
* and so benchmark programs which run in userspace don't
* give us an accurate model.
*
* 2. Since data received is just thrown away, the receiver
* is very fast. This can provide better exercise for the
* sender at the other end.
*
* 3. Since the NetBSD kernel currently uses a run-to-completion
* scheduling model, kttcp provides a benchmark model where
* preemption of the benchmark program is not an issue.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/sysctl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/protosw.h>
#include <sys/socketvar.h>
#include <sys/socket.h>
#include <sys/mbuf.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>
#include <dev/kttcp/kttcpio.h>
#ifndef timersub
#define timersub(tvp, uvp, vvp) \
do { \
(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
if ((vvp)->tv_usec < 0) { \
(vvp)->tv_sec--; \
(vvp)->tv_usec += 1000000; \
} \
} while (0)
#endif
static int kttcp_send(struct thread *p, struct kttcp_io_args *);
static int kttcp_recv(struct thread *p, struct kttcp_io_args *);
static int kttcp_sosend(struct socket *, unsigned long long,
unsigned long long *, struct thread *, int);
static int kttcp_soreceive(struct socket *, unsigned long long,
unsigned long long *, struct thread *, int *);
static d_open_t kttcpopen;
static d_ioctl_t kttcpioctl;
static struct cdevsw kttcp_cdevsw = {
.d_open = kttcpopen,
.d_ioctl = kttcpioctl,
.d_name = "kttcp",
.d_maj = MAJOR_AUTO,
};
static int
kttcpopen(dev_t dev, int flag, int mode, struct thread *td)
{
/* Always succeeds. */
return (0);
}
static int
kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
int error;
if ((flag & FWRITE) == 0)
return EPERM;
switch (cmd) {
case KTTCP_IO_SEND:
error = kttcp_send(td, (struct kttcp_io_args *) data);
break;
case KTTCP_IO_RECV:
error = kttcp_recv(td, (struct kttcp_io_args *) data);
break;
default:
return EINVAL;
}
return error;
}
static int
kttcp_send(struct thread *td, struct kttcp_io_args *kio)
{
struct file *fp;
int error;
struct timeval t0, t1;
unsigned long long len = 0;
unsigned long long done;
if (kio->kio_totalsize >= KTTCP_MAX_XMIT)
return EINVAL;
error = fget(td, kio->kio_socket, &fp);
if (error != 0)
return error;
mtx_lock(&Giant);
if ((fp->f_flag & FWRITE) == 0) {
fdrop(fp, td);
mtx_unlock(&Giant);
return EBADF;
}
if (fp->f_type == DTYPE_SOCKET) {
len = kio->kio_totalsize;
microtime(&t0);
do {
error = kttcp_sosend((struct socket *)fp->f_data, len,
&done, td, 0);
len -= done;
} while (error == 0 && len > 0);
microtime(&t1);
} else
error = EFTYPE;
fdrop(fp, td);
mtx_unlock(&Giant);
if (error != 0)
return error;
timersub(&t1, &t0, &kio->kio_elapsed);
kio->kio_bytesdone = kio->kio_totalsize - len;
return 0;
}
static int
kttcp_recv(struct thread *td, struct kttcp_io_args *kio)
{
struct file *fp;
int error;
struct timeval t0, t1;
unsigned long long len = 0;
unsigned long long done;
if (kio->kio_totalsize > KTTCP_MAX_XMIT)
return EINVAL;
error = fget(td, kio->kio_socket, &fp);
if (error != 0)
return error;
mtx_lock(&Giant);
if ((fp->f_flag & FWRITE) == 0) {
fdrop(fp, td);
mtx_unlock(&Giant);
return EBADF;
}
if (fp->f_type == DTYPE_SOCKET) {
len = kio->kio_totalsize;
microtime(&t0);
do {
error = kttcp_soreceive((struct socket *)fp->f_data,
len, &done, td, NULL);
len -= done;
} while (error == 0 && len > 0 && done > 0);
microtime(&t1);
if (error == EPIPE)
error = 0;
} else
error = EFTYPE;
fdrop(fp, td);
mtx_unlock(&Giant);
if (error != 0)
return error;
timersub(&t1, &t0, &kio->kio_elapsed);
kio->kio_bytesdone = kio->kio_totalsize - len;
return 0;
}
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
/*
* Slightly changed version of sosend()
*/
int
kttcp_sosend(struct socket *so, unsigned long long slen,
unsigned long long *done, struct thread *td, int flags)
{
struct mbuf **mp, *m, *top;
long space, len, mlen;
int error, s, dontroute, atomic;
long long resid;
atomic = sosendallatonce(so);
resid = slen;
top = NULL;
/*
* In theory resid should be unsigned.
* However, space must be signed, as it might be less than 0
* if we over-committed, and we must use a signed comparison
* of space and resid. On the other hand, a negative resid
* causes us to loop sending 0-length segments to the protocol.
*
* Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
* type sockets since that's an error.
*/
if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
error = EINVAL;
goto out;
}
dontroute =
(flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
(so->so_proto->pr_flags & PR_ATOMIC);
if (td)
td->td_proc->p_stats->p_ru.ru_msgsnd++;
#define snderr(errno) { error = errno; splx(s); goto release; }
restart:
error = sblock(&so->so_snd, SBLOCKWAIT(flags));
if (error)
goto out;
do {
s = splnet();
if (so->so_state & SS_CANTSENDMORE)
snderr(EPIPE);
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
splx(s);
goto release;
}
if ((so->so_state & SS_ISCONNECTED) == 0) {
/*
* `sendto' and `sendmsg' is allowed on a connection-
* based socket if it supports implied connect.
* Return ENOTCONN if not connected and no address is
* supplied.
*/
if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
(so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
if ((so->so_state & SS_ISCONFIRMING) == 0 &&
!(resid == 0))
snderr(ENOTCONN);
} else
snderr(EDESTADDRREQ);
}
space = sbspace(&so->so_snd);
if (flags & MSG_OOB)
space += 1024;
if (atomic && resid > so->so_snd.sb_hiwat)
snderr(EMSGSIZE);
if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
if (so->so_state & SS_NBIO)
snderr(EWOULDBLOCK);
sbunlock(&so->so_snd);
error = sbwait(&so->so_snd);
splx(s);
if (error)
goto out;
goto restart;
}
splx(s);
mp = &top;
do {
do {
if (top == 0) {
MGETHDR(m, M_WAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MHLEN;
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
} else {
MGET(m, M_WAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MLEN;
}
if (resid >= MINCLSIZE) {
MCLGET(m, M_WAIT);
if ((m->m_flags & M_EXT) == 0)
goto nopages;
mlen = MCLBYTES;
len = min(min(mlen, resid), space);
} else {
nopages:
len = min(min(mlen, resid), space);
/*
* For datagram protocols, leave room
* for protocol headers in first mbuf.
*/
if (atomic && top == 0 && len < mlen)
MH_ALIGN(m, len);
}
space -= len;
resid -= len;
m->m_len = len;
*mp = m;
top->m_pkthdr.len += len;
if (error)
goto release;
mp = &m->m_next;
if (resid <= 0) {
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
break;
}
} while (space > 0 && atomic);
if (dontroute)
so->so_options |= SO_DONTROUTE;
s = splnet(); /* XXX */
/*
* XXX all the SS_CANTSENDMORE checks previously
* done could be out of date. We could have recieved
* a reset packet in an interrupt or maybe we slept
* while doing page faults in uiomove() etc. We could
* probably recheck again inside the splnet() protection
* here, but there are probably other places that this
* also happens. We must rethink this.
*/
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
* If the user set MSG_EOF, the protocol
* understands this flag and nothing left to
* send then use PRU_SEND_EOF instead of PRU_SEND.
*/
((flags & MSG_EOF) &&
(so->so_proto->pr_flags & PR_IMPLOPCL) &&
(resid <= 0)) ?
PRUS_EOF :
/* If there is more to send set PRUS_MORETOCOME */
(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
top, NULL, NULL, td);
splx(s);
if (dontroute)
so->so_options &= ~SO_DONTROUTE;
top = 0;
mp = &top;
if (error)
goto release;
} while (resid && space > 0);
} while (resid);
release:
sbunlock(&so->so_snd);
out:
if (top)
m_freem(top);
*done = slen - resid;
return (error);
}
int
kttcp_soreceive(struct socket *so, unsigned long long slen,
unsigned long long *done, struct thread *td, int *flagsp)
{
struct mbuf *m, **mp;
int flags, len, error, s, offset;
struct protosw *pr;
struct mbuf *nextrecord;
int moff, type;
long long orig_resid, resid;
pr = so->so_proto;
mp = NULL;
type = 0;
resid = orig_resid = slen;
if (flagsp)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
if (flags & MSG_OOB) {
m = m_get(M_WAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
do {
resid -= min(resid, m->m_len);
m = m_free(m);
} while (resid && error == 0 && m);
bad:
if (m)
m_freem(m);
return (error);
}
if (mp)
*mp = (struct mbuf *)0;
if (so->so_state & SS_ISCONFIRMING && resid)
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
restart:
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
if (error)
return (error);
s = splnet();
m = so->so_rcv.sb_mb;
/*
* If we have less data than requested, block awaiting more
* (subject to any timeout) if:
* 1. the current count is less than the low water mark, or
* 2. MSG_WAITALL is set, and it is possible to do the entire
* receive operation at once if we block (resid <= hiwat).
* 3. MSG_DONTWAIT is not set
* If MSG_WAITALL is set but resid is larger than the receive buffer,
* we have to do the receive in sections, and thus risk returning
* a short count if a timeout or signal occurs after we start.
*/
if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < resid) &&
(so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) &&
m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
if (so->so_error) {
if (m)
goto dontblock;
error = so->so_error;
if ((flags & MSG_PEEK) == 0)
so->so_error = 0;
goto release;
}
if (so->so_state & SS_CANTRCVMORE) {
if (m)
goto dontblock;
else
goto release;
}
for (; m; m = m->m_next)
if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
m = so->so_rcv.sb_mb;
goto dontblock;
}
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
error = ENOTCONN;
goto release;
}
if (resid == 0)
goto release;
if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
error = EWOULDBLOCK;
goto release;
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
splx(s);
if (error)
return (error);
goto restart;
}
dontblock:
/*
* On entry here, m points to the first record of the socket buffer.
* While we process the initial mbufs containing address and control
* info, we save a copy of m->m_nextpkt into nextrecord.
*/
if (td)
td->td_proc->p_stats->p_ru.ru_msgrcv++;
KASSERT(m == so->so_rcv.sb_mb, ("receive 1b"));
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
nextrecord = m->m_nextpkt;
if (pr->pr_flags & PR_ADDR) {
KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
orig_resid = 0;
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
sbfree(&so->so_rcv, m);
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
}
while (m && m->m_type == MT_CONTROL && error == 0) {
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
sbfree(&so->so_rcv, m);
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
}
/*
* If m is non-NULL, we have some data to read. From now on,
* make sure to keep sb_lastrecord consistent when working on
* the last packet on the chain (nextrecord == NULL) and we
* change m->m_nextpkt.
*/
if (m) {
if ((flags & MSG_PEEK) == 0) {
m->m_nextpkt = nextrecord;
/*
* If nextrecord == NULL (this is a single chain),
* then sb_lastrecord may not be valid here if m
* was changed earlier.
*/
if (nextrecord == NULL) {
KASSERT(so->so_rcv.sb_mb == m, ("receive 1c"));
so->so_rcv.sb_lastrecord = m;
}
}
type = m->m_type;
if (type == MT_OOBDATA)
flags |= MSG_OOB;
} else {
if ((flags & MSG_PEEK) == 0) {
KASSERT(so->so_rcv.sb_mb == m, ("receive 1d"));
so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
moff = 0;
offset = 0;
while (m && resid > 0 && error == 0) {
if (m->m_type == MT_OOBDATA) {
if (type != MT_OOBDATA)
break;
} else if (type == MT_OOBDATA)
break;
else
KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
("receive 3"));
so->so_state &= ~SS_RCVATMARK;
len = resid;
if (so->so_oobmark && len > so->so_oobmark - offset)
len = so->so_oobmark - offset;
if (len > m->m_len - moff)
len = m->m_len - moff;
/*
* If mp is set, just pass back the mbufs.
* Otherwise copy them out via the uio, then free.
* Sockbuf must be consistent here (points to current mbuf,
* it points to next record) when we drop priority;
* we must note any additions to the sockbuf when we
* block interrupts again.
*/
resid -= len;
if (len == m->m_len - moff) {
if (m->m_flags & M_EOR)
flags |= MSG_EOR;
if (flags & MSG_PEEK) {
m = m->m_next;
moff = 0;
} else {
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
if (mp) {
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
*mp = (struct mbuf *)0;
} else {
so->so_rcv.sb_mb = m = m_free(m);
}
/*
* If m != NULL, we also know that
* so->so_rcv.sb_mb != NULL.
*/
KASSERT(so->so_rcv.sb_mb == m, ("receive 3a"));
if (m) {
m->m_nextpkt = nextrecord;
if (nextrecord == NULL)
so->so_rcv.sb_lastrecord = m;
} else {
so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
}
} else {
if (flags & MSG_PEEK)
moff += len;
else {
if (mp)
*mp = m_copym(m, 0, len, M_WAIT);
m->m_data += len;
m->m_len -= len;
so->so_rcv.sb_cc -= len;
}
}
if (so->so_oobmark) {
if ((flags & MSG_PEEK) == 0) {
so->so_oobmark -= len;
if (so->so_oobmark == 0) {
so->so_state |= SS_RCVATMARK;
break;
}
} else {
offset += len;
if (offset == so->so_oobmark)
break;
}
}
if (flags & MSG_EOR)
break;
/*
* If the MSG_WAITALL flag is set (for non-atomic socket),
* we must not quit until "uio->uio_resid == 0" or an error
* termination. If a signal/timeout occurs, return
* with a short count but without error.
* Keep sockbuf locked against other readers.
*/
while (flags & MSG_WAITALL && m == 0 && resid > 0 &&
!sosendallatonce(so) && !nextrecord) {
if (so->so_error || so->so_state & SS_CANTRCVMORE)
break;
/*
* The window might have closed to zero, make
* sure we send an ack now that we've drained
* the buffer or we might end up blocking until
* the idle takes over (5 seconds).
*/
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
error = sbwait(&so->so_rcv);
if (error) {
sbunlock(&so->so_rcv);
splx(s);
return (0);
}
m = so->so_rcv.sb_mb;
if (m)
nextrecord = m->m_nextpkt;
}
}
if (m && pr->pr_flags & PR_ATOMIC) {
flags |= MSG_TRUNC;
if ((flags & MSG_PEEK) == 0)
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
if (m == 0) {
/*
* First part is an inline SB_EMPTY_FIXUP(). Second
* part makes sure sb_lastrecord is up-to-date if
* there is still data in the socket buffer.
*/
so->so_rcv.sb_mb = nextrecord;
if (so->so_rcv.sb_mb == NULL) {
so->so_rcv.sb_mbtail = NULL;
so->so_rcv.sb_lastrecord = NULL;
} else if (nextrecord->m_nextpkt == NULL)
so->so_rcv.sb_lastrecord = nextrecord;
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
}
if (orig_resid == resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
sbunlock(&so->so_rcv);
splx(s);
goto restart;
}
if (flagsp)
*flagsp |= flags;
release:
sbunlock(&so->so_rcv);
splx(s);
*done = slen - resid;
#if 0
printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
#endif
return (error);
}
static dev_t kttcp_dev;
/*
* Initialization code, both for static and dynamic loading.
*/
static int
kttcpdev_modevent(module_t mod, int type, void *unused)
{
switch (type) {
case MOD_LOAD:
kttcp_dev = make_dev(&kttcp_cdevsw, 0,
UID_ROOT, GID_WHEEL, 0666,
"kttcp");
return 0;
case MOD_UNLOAD:
/*XXX disallow if active sessions */
destroy_dev(kttcp_dev);
return 0;
}
return EINVAL;
}
static moduledata_t kttcpdev_mod = {
"kttcpdev",
kttcpdev_modevent,
0
};
MODULE_VERSION(kttcpdev, 1);
DECLARE_MODULE(kttcpdev, kttcpdev_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);

View File

@ -0,0 +1,59 @@
/* $FreeBSD$ */
/* $NetBSD$ */
/*
* Copyright (c) 2002 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden and Jason R. Thorpe for
* Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _DEV_KTTCPIO_H_
#define _DEV_KTTCPIO_H_
#include <sys/ioccom.h>
#include <sys/time.h>
struct kttcp_io_args {
unsigned long long kio_totalsize;/* i/o total size (IN) */
unsigned long long kio_bytesdone;/* i/o actually completed (OUT) */
struct timeval kio_elapsed; /* elapsed time (OUT) */
int kio_socket; /* socket to use for i/o (IN) */
int kio_protovers; /* KTTCP protocol version */
};
#define KTTCP_IO_SEND _IOWR('K', 0, struct kttcp_io_args)
#define KTTCP_IO_RECV _IOWR('K', 1, struct kttcp_io_args)
#define KTTCP_MAX_XMIT 0x7fffffffLL /* XXX can't handle > 31 bits */
#endif /* _DEV_KTTCPIO_H_ */