Make kttcp work in -stable (and -current)

- Removed kttcp_sosend() and kttcp_soreceive() in favor of using
  sosend() / soreceive() with UIO_NOCOPY.  The locking changes in the
  socket layer make merging the kttcp_so* and the so* routines a real
  pain.  It was a lot easier to just use the integrated routines.

- Eliminated KTTCP_MAX_XMIT restrictions on send and receive sizes
  because I encountered no problems with streams larger than MAX_INT.
  MAX_INT bytes is only good for a few seconds at 4Gb/sec ;)

- Removed Giant from send / recv routines.

- Fixed character device protos, cdevsw init, etc, to work in 5.x /
  6.x

Reviewed by: sam
MFC after: 1 week
This commit is contained in:
gallatin 2004-12-13 20:40:16 +00:00
parent ac3e7ba526
commit 6144e74fca
2 changed files with 37 additions and 537 deletions

View File

@ -147,8 +147,6 @@ main(int argc, char *argv[])
break;
case 'n':
xmitsize = get_bytes(optarg);
if (xmitsize > KTTCP_MAX_XMIT)
xmitsize = KTTCP_MAX_XMIT;
xmitset = 1;
break;
case 'p':

View File

@ -82,6 +82,7 @@
#include <sys/mbuf.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>
#include <sys/module.h>
#include "kttcpio.h"
@ -99,10 +100,6 @@
static int kttcp_send(struct thread *p, struct kttcp_io_args *);
static int kttcp_recv(struct thread *p, struct kttcp_io_args *);
static int kttcp_sosend(struct socket *, unsigned long long,
unsigned long long *, struct thread *, int);
static int kttcp_soreceive(struct socket *, unsigned long long,
unsigned long long *, struct thread *, int *);
static d_open_t kttcpopen;
static d_ioctl_t kttcpioctl;
@ -112,17 +109,18 @@ static struct cdevsw kttcp_cdevsw = {
.d_ioctl = kttcpioctl,
.d_name = "kttcp",
.d_maj = MAJOR_AUTO,
.d_version = D_VERSION,
};
static int
kttcpopen(dev_t dev, int flag, int mode, struct thread *td)
kttcpopen(struct cdev *dev, int flag, int mode, struct thread *td)
{
/* Always succeeds. */
return (0);
}
static int
kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
kttcpioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
int error;
@ -145,6 +143,8 @@ kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td)
return error;
}
static int nbyte = 65536;
static int
kttcp_send(struct thread *td, struct kttcp_io_args *kio)
{
@ -152,33 +152,38 @@ kttcp_send(struct thread *td, struct kttcp_io_args *kio)
int error;
struct timeval t0, t1;
unsigned long long len = 0;
unsigned long long done;
struct uio auio;
struct iovec aiov;
if (kio->kio_totalsize >= KTTCP_MAX_XMIT)
return EINVAL;
bzero(&aiov, sizeof(aiov));
bzero(&auio, sizeof(auio));
auio.uio_iov = &aiov;
auio.uio_segflg = UIO_NOCOPY;
error = fget(td, kio->kio_socket, &fp);
if (error != 0)
return error;
mtx_lock(&Giant);
if ((fp->f_flag & FWRITE) == 0) {
fdrop(fp, td);
mtx_unlock(&Giant);
return EBADF;
}
if (fp->f_type == DTYPE_SOCKET) {
len = kio->kio_totalsize;
microtime(&t0);
do {
error = kttcp_sosend((struct socket *)fp->f_data, len,
&done, td, 0);
len -= done;
} while (error == 0 && len > 0);
nbyte = MIN(len, (unsigned long long)nbyte);
aiov.iov_len = nbyte;
auio.uio_resid = nbyte;
auio.uio_offset = 0;
error = sosend((struct socket *)fp->f_data, NULL,
&auio, NULL, NULL, 0, td);
len -= auio.uio_offset;
} while (error == 0 && len != 0);
microtime(&t1);
} else
error = EFTYPE;
fdrop(fp, td);
mtx_unlock(&Giant);
if (error != 0)
return error;
timersub(&t1, &t0, &kio->kio_elapsed);
@ -195,35 +200,40 @@ kttcp_recv(struct thread *td, struct kttcp_io_args *kio)
int error;
struct timeval t0, t1;
unsigned long long len = 0;
unsigned long long done;
struct uio auio;
struct iovec aiov;
if (kio->kio_totalsize > KTTCP_MAX_XMIT)
return EINVAL;
bzero(&aiov, sizeof(aiov));
bzero(&auio, sizeof(auio));
auio.uio_iov = &aiov;
auio.uio_segflg = UIO_NOCOPY;
error = fget(td, kio->kio_socket, &fp);
if (error != 0)
return error;
mtx_lock(&Giant);
if ((fp->f_flag & FWRITE) == 0) {
fdrop(fp, td);
mtx_unlock(&Giant);
return EBADF;
}
if (fp->f_type == DTYPE_SOCKET) {
len = kio->kio_totalsize;
microtime(&t0);
do {
error = kttcp_soreceive((struct socket *)fp->f_data,
len, &done, td, NULL);
len -= done;
} while (error == 0 && len > 0 && done > 0);
nbyte = MIN(len, (unsigned long long)nbyte);
aiov.iov_len = nbyte;
auio.uio_resid = nbyte;
auio.uio_offset = 0;
error = soreceive((struct socket *)fp->f_data,
NULL, &auio, NULL, NULL, NULL);
len -= auio.uio_offset;
} while (error == 0 && len > 0 && auio.uio_offset != 0);
microtime(&t1);
if (error == EPIPE)
error = 0;
} else
error = EFTYPE;
fdrop(fp, td);
mtx_unlock(&Giant);
if (error != 0)
return error;
timersub(&t1, &t0, &kio->kio_elapsed);
@ -233,515 +243,7 @@ kttcp_recv(struct thread *td, struct kttcp_io_args *kio)
return 0;
}
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
/*
* Slightly changed version of sosend()
*/
int
kttcp_sosend(struct socket *so, unsigned long long slen,
unsigned long long *done, struct thread *td, int flags)
{
struct mbuf **mp, *m, *top;
long space, len, mlen;
int error, s, dontroute, atomic;
long long resid;
atomic = sosendallatonce(so);
resid = slen;
top = NULL;
/*
* In theory resid should be unsigned.
* However, space must be signed, as it might be less than 0
* if we over-committed, and we must use a signed comparison
* of space and resid. On the other hand, a negative resid
* causes us to loop sending 0-length segments to the protocol.
*
* Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
* type sockets since that's an error.
*/
if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
error = EINVAL;
goto out;
}
dontroute =
(flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
(so->so_proto->pr_flags & PR_ATOMIC);
if (td)
td->td_proc->p_stats->p_ru.ru_msgsnd++;
#define snderr(errno) { error = errno; splx(s); goto release; }
restart:
error = sblock(&so->so_snd, SBLOCKWAIT(flags));
if (error)
goto out;
do {
s = splnet();
if (so->so_snd.sb_state & SBS_CANTSENDMORE)
snderr(EPIPE);
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
splx(s);
goto release;
}
if ((so->so_state & SS_ISCONNECTED) == 0) {
/*
* `sendto' and `sendmsg' is allowed on a connection-
* based socket if it supports implied connect.
* Return ENOTCONN if not connected and no address is
* supplied.
*/
if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
(so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
if ((so->so_state & SS_ISCONFIRMING) == 0 &&
!(resid == 0))
snderr(ENOTCONN);
} else
snderr(EDESTADDRREQ);
}
space = sbspace(&so->so_snd);
if (flags & MSG_OOB)
space += 1024;
if (atomic && resid > so->so_snd.sb_hiwat)
snderr(EMSGSIZE);
if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
if (so->so_state & SS_NBIO)
snderr(EWOULDBLOCK);
sbunlock(&so->so_snd);
error = sbwait(&so->so_snd);
splx(s);
if (error)
goto out;
goto restart;
}
splx(s);
mp = &top;
do {
do {
if (top == 0) {
MGETHDR(m, M_WAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MHLEN;
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = (struct ifnet *)0;
} else {
MGET(m, M_WAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto release;
}
mlen = MLEN;
}
if (resid >= MINCLSIZE) {
MCLGET(m, M_WAIT);
if ((m->m_flags & M_EXT) == 0)
goto nopages;
mlen = MCLBYTES;
len = min(min(mlen, resid), space);
} else {
nopages:
len = min(min(mlen, resid), space);
/*
* For datagram protocols, leave room
* for protocol headers in first mbuf.
*/
if (atomic && top == 0 && len < mlen)
MH_ALIGN(m, len);
}
space -= len;
resid -= len;
m->m_len = len;
*mp = m;
top->m_pkthdr.len += len;
if (error)
goto release;
mp = &m->m_next;
if (resid <= 0) {
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
break;
}
} while (space > 0 && atomic);
if (dontroute)
so->so_options |= SO_DONTROUTE;
s = splnet(); /* XXX */
/*
* XXX all the SBS_CANTSENDMORE checks previously
* done could be out of date. We could have recieved
* a reset packet in an interrupt or maybe we slept
* while doing page faults in uiomove() etc. We could
* probably recheck again inside the splnet() protection
* here, but there are probably other places that this
* also happens. We must rethink this.
*/
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
* If the user set MSG_EOF, the protocol
* understands this flag and nothing left to
* send then use PRU_SEND_EOF instead of PRU_SEND.
*/
((flags & MSG_EOF) &&
(so->so_proto->pr_flags & PR_IMPLOPCL) &&
(resid <= 0)) ?
PRUS_EOF :
/* If there is more to send set PRUS_MORETOCOME */
(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
top, NULL, NULL, td);
splx(s);
if (dontroute)
so->so_options &= ~SO_DONTROUTE;
top = 0;
mp = &top;
if (error)
goto release;
} while (resid && space > 0);
} while (resid);
release:
sbunlock(&so->so_snd);
out:
if (top)
m_freem(top);
*done = slen - resid;
return (error);
}
int
kttcp_soreceive(struct socket *so, unsigned long long slen,
unsigned long long *done, struct thread *td, int *flagsp)
{
struct mbuf *m, **mp;
int flags, len, error, s, offset;
struct protosw *pr;
struct mbuf *nextrecord;
int moff, type;
long long orig_resid, resid;
pr = so->so_proto;
mp = NULL;
type = 0;
resid = orig_resid = slen;
if (flagsp)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
if (flags & MSG_OOB) {
m = m_get(M_WAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
do {
resid -= min(resid, m->m_len);
m = m_free(m);
} while (resid && error == 0 && m);
bad:
if (m)
m_freem(m);
return (error);
}
if (mp)
*mp = (struct mbuf *)0;
if (so->so_state & SS_ISCONFIRMING && resid)
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
restart:
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
if (error)
return (error);
s = splnet();
m = so->so_rcv.sb_mb;
/*
* If we have less data than requested, block awaiting more
* (subject to any timeout) if:
* 1. the current count is less than the low water mark, or
* 2. MSG_WAITALL is set, and it is possible to do the entire
* receive operation at once if we block (resid <= hiwat).
* 3. MSG_DONTWAIT is not set
* If MSG_WAITALL is set but resid is larger than the receive buffer,
* we have to do the receive in sections, and thus risk returning
* a short count if a timeout or signal occurs after we start.
*/
if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < resid) &&
(so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) &&
m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
if (so->so_error) {
if (m)
goto dontblock;
error = so->so_error;
if ((flags & MSG_PEEK) == 0)
so->so_error = 0;
goto release;
}
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
if (m)
goto dontblock;
else
goto release;
}
for (; m; m = m->m_next)
if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
m = so->so_rcv.sb_mb;
goto dontblock;
}
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
error = ENOTCONN;
goto release;
}
if (resid == 0)
goto release;
if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
error = EWOULDBLOCK;
goto release;
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
splx(s);
if (error)
return (error);
goto restart;
}
dontblock:
/*
* On entry here, m points to the first record of the socket buffer.
* While we process the initial mbufs containing address and control
* info, we save a copy of m->m_nextpkt into nextrecord.
*/
if (td)
td->td_proc->p_stats->p_ru.ru_msgrcv++;
KASSERT(m == so->so_rcv.sb_mb, ("receive 1b"));
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
nextrecord = m->m_nextpkt;
if (pr->pr_flags & PR_ADDR) {
KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
orig_resid = 0;
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
sbfree(&so->so_rcv, m);
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
}
while (m && m->m_type == MT_CONTROL && error == 0) {
if (flags & MSG_PEEK) {
m = m->m_next;
} else {
sbfree(&so->so_rcv, m);
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
}
/*
* If m is non-NULL, we have some data to read. From now on,
* make sure to keep sb_lastrecord consistent when working on
* the last packet on the chain (nextrecord == NULL) and we
* change m->m_nextpkt.
*/
if (m) {
if ((flags & MSG_PEEK) == 0) {
m->m_nextpkt = nextrecord;
/*
* If nextrecord == NULL (this is a single chain),
* then sb_lastrecord may not be valid here if m
* was changed earlier.
*/
if (nextrecord == NULL) {
KASSERT(so->so_rcv.sb_mb == m, ("receive 1c"));
so->so_rcv.sb_lastrecord = m;
}
}
type = m->m_type;
if (type == MT_OOBDATA)
flags |= MSG_OOB;
} else {
if ((flags & MSG_PEEK) == 0) {
KASSERT(so->so_rcv.sb_mb == m, ("receive 1d"));
so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
moff = 0;
offset = 0;
while (m && resid > 0 && error == 0) {
if (m->m_type == MT_OOBDATA) {
if (type != MT_OOBDATA)
break;
} else if (type == MT_OOBDATA)
break;
else
KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
("receive 3"));
so->so_rcv.sb_state &= ~SBS_RCVATMARK;
len = resid;
if (so->so_oobmark && len > so->so_oobmark - offset)
len = so->so_oobmark - offset;
if (len > m->m_len - moff)
len = m->m_len - moff;
/*
* If mp is set, just pass back the mbufs.
* Otherwise copy them out via the uio, then free.
* Sockbuf must be consistent here (points to current mbuf,
* it points to next record) when we drop priority;
* we must note any additions to the sockbuf when we
* block interrupts again.
*/
resid -= len;
if (len == m->m_len - moff) {
if (m->m_flags & M_EOR)
flags |= MSG_EOR;
if (flags & MSG_PEEK) {
m = m->m_next;
moff = 0;
} else {
nextrecord = m->m_nextpkt;
sbfree(&so->so_rcv, m);
if (mp) {
*mp = m;
mp = &m->m_next;
so->so_rcv.sb_mb = m = m->m_next;
*mp = (struct mbuf *)0;
} else {
so->so_rcv.sb_mb = m = m_free(m);
}
/*
* If m != NULL, we also know that
* so->so_rcv.sb_mb != NULL.
*/
KASSERT(so->so_rcv.sb_mb == m, ("receive 3a"));
if (m) {
m->m_nextpkt = nextrecord;
if (nextrecord == NULL)
so->so_rcv.sb_lastrecord = m;
} else {
so->so_rcv.sb_mb = nextrecord;
SB_EMPTY_FIXUP(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
}
} else {
if (flags & MSG_PEEK)
moff += len;
else {
if (mp)
*mp = m_copym(m, 0, len, M_WAIT);
m->m_data += len;
m->m_len -= len;
so->so_rcv.sb_cc -= len;
}
}
if (so->so_oobmark) {
if ((flags & MSG_PEEK) == 0) {
so->so_oobmark -= len;
if (so->so_oobmark == 0) {
so->so_rcv.sb_state |= SBS_RCVATMARK;
break;
}
} else {
offset += len;
if (offset == so->so_oobmark)
break;
}
}
if (flags & MSG_EOR)
break;
/*
* If the MSG_WAITALL flag is set (for non-atomic socket),
* we must not quit until "uio->uio_resid == 0" or an error
* termination. If a signal/timeout occurs, return
* with a short count but without error.
* Keep sockbuf locked against other readers.
*/
while (flags & MSG_WAITALL && m == 0 && resid > 0 &&
!sosendallatonce(so) && !nextrecord) {
if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
break;
/*
* The window might have closed to zero, make
* sure we send an ack now that we've drained
* the buffer or we might end up blocking until
* the idle takes over (5 seconds).
*/
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
error = sbwait(&so->so_rcv);
if (error) {
sbunlock(&so->so_rcv);
splx(s);
return (0);
}
m = so->so_rcv.sb_mb;
if (m)
nextrecord = m->m_nextpkt;
}
}
if (m && pr->pr_flags & PR_ATOMIC) {
flags |= MSG_TRUNC;
if ((flags & MSG_PEEK) == 0)
(void) sbdroprecord(&so->so_rcv);
}
if ((flags & MSG_PEEK) == 0) {
if (m == 0) {
/*
* First part is an inline SB_EMPTY_FIXUP(). Second
* part makes sure sb_lastrecord is up-to-date if
* there is still data in the socket buffer.
*/
so->so_rcv.sb_mb = nextrecord;
if (so->so_rcv.sb_mb == NULL) {
so->so_rcv.sb_mbtail = NULL;
so->so_rcv.sb_lastrecord = NULL;
} else if (nextrecord->m_nextpkt == NULL)
so->so_rcv.sb_lastrecord = nextrecord;
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
}
if (orig_resid == resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
sbunlock(&so->so_rcv);
splx(s);
goto restart;
}
if (flagsp)
*flagsp |= flags;
release:
sbunlock(&so->so_rcv);
splx(s);
*done = slen - resid;
#if 0
printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
#endif
return (error);
}
static dev_t kttcp_dev;
static struct cdev *kttcp_dev;
/*
* Initialization code, both for static and dynamic loading.