Replace the ill-named ZERO_COPY_SOCKET kernel option with two
more appropriate named kernel options for the very distinct send and receive path. "options SOCKET_SEND_COW" enables VM page copy-on-write based sending of data on an outbound socket. NB: The COW based send mechanism is not safe and may result in kernel crashes. "options SOCKET_RECV_PFLIP" enables VM kernel/userspace page flipping for special disposable pages attached as external storage to mbufs. Only the naming of the kernel options is changed and their corresponding #ifdef sections are adjusted. No functionality is added or removed. Discussed with: alc (mechanism and limitations of send side COW)
This commit is contained in:
parent
0de9e2cbbd
commit
e37e60c379
@ -964,12 +964,20 @@ options TCP_SIGNATURE #include support for RFC 2385
|
||||
# a smooth scheduling of the traffic.
|
||||
options DUMMYNET
|
||||
|
||||
# Zero copy sockets support. This enables "zero copy" for sending and
|
||||
# receiving data via a socket. The send side works for any type of NIC,
|
||||
# the receive side only works for NICs that support MTUs greater than the
|
||||
# page size of your architecture and that support header splitting. See
|
||||
# zero_copy(9) for more details.
|
||||
options ZERO_COPY_SOCKETS
|
||||
# "Zero copy" sockets support is split into the send and receive path
|
||||
# which operate very differently.
|
||||
# For the send path the VM page with the data is wired into the kernel
|
||||
# and marked as COW (copy-on-write). If the application touches the
|
||||
# data while it is still in the send socket buffer the page is copied
|
||||
# and divorced from its kernel wiring (no longer zero copy).
|
||||
# The receive side requires explicit NIC driver support to create
|
||||
# disposable pages which are flipped from kernel to user-space VM.
|
||||
# See zero_copy(9) for more details.
|
||||
# XXX: The COW based send mechanism is not safe and may result in
|
||||
# kernel crashes.
|
||||
# XXX: None of the current NIC drivers support disposeable pages.
|
||||
options SOCKET_SEND_COW
|
||||
options SOCKET_RECV_PFLIP
|
||||
|
||||
#####################################################################
|
||||
# FILESYSTEM OPTIONS
|
||||
|
@ -520,7 +520,8 @@ NGATM_CCATM opt_netgraph.h
|
||||
# DRM options
|
||||
DRM_DEBUG opt_drm.h
|
||||
|
||||
ZERO_COPY_SOCKETS opt_zero.h
|
||||
SOCKET_SEND_COW opt_zero.h
|
||||
SOCKET_RECV_PFLIP opt_zero.h
|
||||
TI_SF_BUF_JUMBO opt_ti.h
|
||||
TI_JUMBO_HDRSPLIT opt_ti.h
|
||||
BCE_JUMBO_HDRSPLIT opt_bce.h
|
||||
|
@ -57,7 +57,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_map.h>
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
#include <vm/vm_object.h>
|
||||
#endif
|
||||
|
||||
@ -66,7 +66,7 @@ SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
|
||||
|
||||
static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
|
||||
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
/* Declared in uipc_socket.c */
|
||||
extern int so_zero_copy_receive;
|
||||
|
||||
@ -128,7 +128,7 @@ retry:
|
||||
vm_map_lookup_done(map, entry);
|
||||
return(KERN_SUCCESS);
|
||||
}
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
|
||||
int
|
||||
copyin_nofault(const void *udaddr, void *kaddr, size_t len)
|
||||
@ -261,7 +261,7 @@ uiomove_frombuf(void *buf, int buflen, struct uio *uio)
|
||||
return (uiomove((char *)buf + offset, n, uio));
|
||||
}
|
||||
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
/*
|
||||
* Experimental support for zero-copy I/O
|
||||
*/
|
||||
@ -356,7 +356,7 @@ uiomoveco(void *cp, int n, struct uio *uio, int disposable)
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
|
||||
/*
|
||||
* Give next character to user as result of read.
|
||||
|
@ -219,17 +219,20 @@ static int numopensockets;
|
||||
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
|
||||
&numopensockets, 0, "Number of open sockets");
|
||||
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
/* These aren't static because they're used in other files. */
|
||||
int so_zero_copy_send = 1;
|
||||
int so_zero_copy_receive = 1;
|
||||
#if defined(SOCKET_SEND_COW) || defined(SOCKET_RECV_PFLIP)
|
||||
SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
|
||||
"Zero copy controls");
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
int so_zero_copy_receive = 1;
|
||||
SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
|
||||
&so_zero_copy_receive, 0, "Enable zero copy receive");
|
||||
#endif
|
||||
#ifdef SOCKET_SEND_COW
|
||||
int so_zero_copy_send = 1;
|
||||
SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
|
||||
&so_zero_copy_send, 0, "Enable zero copy send");
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
#endif /* SOCKET_SEND_COW || SOCKET_RECV_PFLIP */
|
||||
|
||||
/*
|
||||
* accept_mtx locks down per-socket fields relating to accept queues. See
|
||||
@ -903,7 +906,7 @@ sodisconnect(struct socket *so)
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
struct so_zerocopy_stats{
|
||||
int size_ok;
|
||||
int align_ok;
|
||||
@ -1008,7 +1011,7 @@ out:
|
||||
*retmp = top;
|
||||
return (error);
|
||||
}
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
|
||||
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
|
||||
|
||||
@ -1019,7 +1022,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
long space;
|
||||
ssize_t resid;
|
||||
int clen = 0, error, dontroute;
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
int atomic = sosendallatonce(so) || top;
|
||||
#endif
|
||||
|
||||
@ -1104,7 +1107,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
if (flags & MSG_EOR)
|
||||
top->m_flags |= M_EOR;
|
||||
} else {
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
error = sosend_copyin(uio, &top, atomic, &space, flags);
|
||||
if (error)
|
||||
goto out;
|
||||
@ -1121,7 +1124,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
||||
goto out;
|
||||
}
|
||||
space -= resid - uio->uio_resid;
|
||||
#endif
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
resid = uio->uio_resid;
|
||||
}
|
||||
KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
|
||||
@ -1293,7 +1296,7 @@ restart:
|
||||
if (flags & MSG_EOR)
|
||||
top->m_flags |= M_EOR;
|
||||
} else {
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_SEND_COW
|
||||
error = sosend_copyin(uio, &top, atomic,
|
||||
&space, flags);
|
||||
if (error != 0)
|
||||
@ -1313,7 +1316,7 @@ restart:
|
||||
goto release;
|
||||
}
|
||||
space -= resid - uio->uio_resid;
|
||||
#endif
|
||||
#endif /* SOCKET_SEND_COW */
|
||||
resid = uio->uio_resid;
|
||||
}
|
||||
if (dontroute) {
|
||||
@ -1405,7 +1408,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
|
||||
if (error)
|
||||
goto bad;
|
||||
do {
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
if (so_zero_copy_receive) {
|
||||
int disposable;
|
||||
|
||||
@ -1419,7 +1422,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
|
||||
min(uio->uio_resid, m->m_len),
|
||||
uio, disposable);
|
||||
} else
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
error = uiomove(mtod(m, void *),
|
||||
(int) min(uio->uio_resid, m->m_len), uio);
|
||||
m = m_free(m);
|
||||
@ -1743,7 +1746,7 @@ dontblock:
|
||||
SBLASTRECORDCHK(&so->so_rcv);
|
||||
SBLASTMBUFCHK(&so->so_rcv);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
#ifdef ZERO_COPY_SOCKETS
|
||||
#ifdef SOCKET_RECV_PFLIP
|
||||
if (so_zero_copy_receive) {
|
||||
int disposable;
|
||||
|
||||
@ -1757,7 +1760,7 @@ dontblock:
|
||||
(int)len, uio,
|
||||
disposable);
|
||||
} else
|
||||
#endif /* ZERO_COPY_SOCKETS */
|
||||
#endif /* SOCKET_RECV_PFLIP */
|
||||
error = uiomove(mtod(m, char *) + moff, (int)len, uio);
|
||||
SOCKBUF_LOCK(&so->so_rcv);
|
||||
if (error) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user