Fix the mechanism for choosing wehether to save the slow-start threshold

in the route.  This allows us to remove the unconditional setting of the
pipesize in the route, which should mean that SO_SNDBUF and SO_RCVBUF
should actually work again.  While we're at it:

- Convert udp_usrreq from `mondo switch statement from Hell' to new-style.
- Delete old TCP mondo switch statement from Hell, which had previously
  been diked out.
This commit is contained in:
Garrett Wollman 1997-02-14 18:15:53 +00:00
parent 4a26224c8d
commit d0390e0570
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=22719
7 changed files with 267 additions and 486 deletions

View File

@ -105,8 +105,9 @@ struct protosw inetsw[] = {
},
{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR,
udp_input, 0, udp_ctlinput, ip_ctloutput,
udp_usrreq,
udp_init
0,
udp_init, 0, 0, 0,
&udp_usrreqs
},
{ SOCK_STREAM, &inetdomain, IPPROTO_TCP,
PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,

View File

@ -26,7 +26,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id$
* $Id: in_rmx.c,v 1.28 1997/02/13 19:46:41 wollman Exp $
*/
/*
@ -119,23 +119,6 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
}
}
/*
* We also specify a send and receive pipe size for every
* route added, to help TCP a bit. TCP doesn't actually
* want a true pipe size, which would be prohibitive in memory
* costs and is hard to compute anyway; it simply uses these
* values to size its buffers. So, we fill them in with the
* same values that TCP would have used anyway, and allow the
* installing program or the link layer to override these values
* as it sees fit. This will hopefully allow TCP more
* opportunities to save its ssthresh value.
*/
if (!rt->rt_rmx.rmx_sendpipe && !(rt->rt_rmx.rmx_locks & RTV_SPIPE))
rt->rt_rmx.rmx_sendpipe = tcp_sendspace;
if (!rt->rt_rmx.rmx_recvpipe && !(rt->rt_rmx.rmx_locks & RTV_RPIPE))
rt->rt_rmx.rmx_recvpipe = tcp_recvspace;
if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
&& rt->rt_ifp)
rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;

View File

@ -320,6 +320,7 @@ tcp_close(tp)
struct socket *so = inp->inp_socket;
register struct mbuf *m;
register struct rtentry *rt;
int dosavessthresh;
/*
* If we got enough samples through the srtt filter,
@ -364,15 +365,31 @@ tcp_close(tp)
tcpstat.tcps_cachedrttvar++;
}
/*
* The old comment here said:
* update the pipelimit (ssthresh) if it has been updated
* already or if a pipesize was specified & the threshhold
* got below half the pipesize. I.e., wait for bad news
* before we start updating, then update on both good
* and bad news.
*
* But we want to save the ssthresh even if no pipesize is
* specified explicitly in the route, because such
* connections still have an implicit pipesize specified
* by the global tcp_sendspace. In the absence of a reliable
* way to calculate the pipesize, it will have to do.
*/
i = tp->snd_ssthresh;
#if 1
if (rt->rt_rmx.rmx_sendpipe != 0)
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
else
dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
#else
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
#endif
if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
i < (rt->rt_rmx.rmx_sendpipe / 2)) {
i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
|| dosavessthresh) {
/*
* convert the limit from user data bytes to
* packets then to packet data bytes.

View File

@ -320,6 +320,7 @@ tcp_close(tp)
struct socket *so = inp->inp_socket;
register struct mbuf *m;
register struct rtentry *rt;
int dosavessthresh;
/*
* If we got enough samples through the srtt filter,
@ -364,15 +365,31 @@ tcp_close(tp)
tcpstat.tcps_cachedrttvar++;
}
/*
* The old comment here said:
* update the pipelimit (ssthresh) if it has been updated
* already or if a pipesize was specified & the threshhold
* got below half the pipesize. I.e., wait for bad news
* before we start updating, then update on both good
* and bad news.
*
* But we want to save the ssthresh even if no pipesize is
* specified explicitly in the route, because such
* connections still have an implicit pipesize specified
* by the global tcp_sendspace. In the absence of a reliable
* way to calculate the pipesize, it will have to do.
*/
i = tp->snd_ssthresh;
#if 1
if (rt->rt_rmx.rmx_sendpipe != 0)
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
else
dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
#else
dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
#endif
if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) ||
i < (rt->rt_rmx.rmx_sendpipe / 2)) {
i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
|| dosavessthresh) {
/*
* convert the limit from user data bytes to
* packets then to packet data bytes.

View File

@ -78,323 +78,6 @@ static struct tcpcb *
static struct tcpcb *
tcp_usrclosed __P((struct tcpcb *));
#ifdef notdef
/*
* Process a TCP user request for TCP tb. If this is a send request
* then m is the mbuf chain of send data. If this is a timer expiration
* (called from the software clock routine), then timertype tells which timer.
*/
/*ARGSUSED*/
int
tcp_usrreq(so, req, m, nam, control)
struct socket *so;
int req;
struct mbuf *m, *nam, *control;
{
register struct inpcb *inp;
register struct tcpcb *tp = 0;
struct sockaddr_in *sinp;
int s;
int error = 0;
#ifdef TCPDEBUG
int ostate;
#endif
if (req == PRU_CONTROL)
return (in_control(so, (u_long)m, (caddr_t)nam,
(struct ifnet *)control));
if (control && control->m_len) {
m_freem(control);
if (m)
m_freem(m);
return (EINVAL);
}
s = splnet();
inp = sotoinpcb(so);
/*
* When a TCP is attached to a socket, then there will be
* a (struct inpcb) pointed at by the socket, and this
* structure will point at a subsidary (struct tcpcb).
*/
if (inp == 0 && req != PRU_ATTACH) {
splx(s);
#if 0
/*
* The following corrects an mbuf leak under rare
* circumstances, but has not been fully tested.
*/
if (m && req != PRU_SENSE)
m_freem(m);
#else
/* safer version of fix for mbuf leak */
if (m && (req == PRU_SEND || req == PRU_SENDOOB))
m_freem(m);
#endif
return (EINVAL); /* XXX */
}
if (inp) {
tp = intotcpcb(inp);
/* WHAT IF TP IS 0? */
#ifdef KPROF
tcp_acounts[tp->t_state][req]++;
#endif
#ifdef TCPDEBUG
ostate = tp->t_state;
} else
ostate = 0;
#else /* TCPDEBUG */
}
#endif /* TCPDEBUG */
switch (req) {
/*
* TCP attaches to socket via PRU_ATTACH, reserving space,
* and an internet control block.
*/
case PRU_ATTACH:
if (inp) {
error = EISCONN;
break;
}
error = tcp_attach(so);
if (error)
break;
if ((so->so_options & SO_LINGER) && so->so_linger == 0)
so->so_linger = TCP_LINGERTIME * hz;
tp = sototcpcb(so);
break;
/*
* PRU_DETACH detaches the TCP protocol from the socket.
* If the protocol state is non-embryonic, then can't
* do this directly: have to initiate a PRU_DISCONNECT,
* which may finish later; embryonic TCB's can just
* be discarded here.
*/
case PRU_DETACH:
if (tp->t_state > TCPS_LISTEN)
tp = tcp_disconnect(tp);
else
tp = tcp_close(tp);
break;
/*
* Give the socket an address.
*/
case PRU_BIND:
/*
* Must check for multicast addresses and disallow binding
* to them.
*/
sinp = mtod(nam, struct sockaddr_in *);
if (sinp->sin_family == AF_INET &&
IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
error = EAFNOSUPPORT;
break;
}
error = in_pcbbind(inp, nam);
if (error)
break;
break;
/*
* Prepare to accept connections.
*/
case PRU_LISTEN:
if (inp->inp_lport == 0)
error = in_pcbbind(inp, NULL);
if (error == 0)
tp->t_state = TCPS_LISTEN;
break;
/*
* Initiate connection to peer.
* Create a template for use in transmissions on this connection.
* Enter SYN_SENT state, and mark socket as connecting.
* Start keep-alive timer, and seed output sequence space.
* Send initial segment on connection.
*/
case PRU_CONNECT:
/*
* Must disallow TCP ``connections'' to multicast addresses.
*/
sinp = mtod(nam, struct sockaddr_in *);
if (sinp->sin_family == AF_INET
&& IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
error = EAFNOSUPPORT;
break;
}
if ((error = tcp_connect(tp, nam)) != 0)
break;
error = tcp_output(tp);
break;
/*
* Create a TCP connection between two sockets.
*/
case PRU_CONNECT2:
error = EOPNOTSUPP;
break;
/*
* Initiate disconnect from peer.
* If connection never passed embryonic stage, just drop;
* else if don't need to let data drain, then can just drop anyways,
* else have to begin TCP shutdown process: mark socket disconnecting,
* drain unread data, state switch to reflect user close, and
* send segment (e.g. FIN) to peer. Socket will be really disconnected
* when peer sends FIN and acks ours.
*
* SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
*/
case PRU_DISCONNECT:
tp = tcp_disconnect(tp);
break;
/*
* Accept a connection. Essentially all the work is
* done at higher levels; just return the address
* of the peer, storing through addr.
*/
case PRU_ACCEPT:
in_setpeeraddr(inp, nam);
break;
/*
* Mark the connection as being incapable of further output.
*/
case PRU_SHUTDOWN:
socantsendmore(so);
tp = tcp_usrclosed(tp);
if (tp)
error = tcp_output(tp);
break;
/*
* After a receive, possibly send window update to peer.
*/
case PRU_RCVD:
(void) tcp_output(tp);
break;
/*
* Do a send by putting data in output queue and updating urgent
* marker if URG set. Possibly send more data.
*/
case PRU_SEND_EOF:
case PRU_SEND:
sbappend(&so->so_snd, m);
if (nam && tp->t_state < TCPS_SYN_SENT) {
/*
* Do implied connect if not yet connected,
* initialize window to default value, and
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
error = tcp_connect(tp, nam);
if (error)
break;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
}
if (req == PRU_SEND_EOF) {
/*
* Close the send side of the connection after
* the data is sent.
*/
socantsendmore(so);
tp = tcp_usrclosed(tp);
}
if (tp != NULL)
error = tcp_output(tp);
break;
/*
* Abort the TCP.
*/
case PRU_ABORT:
tp = tcp_drop(tp, ECONNABORTED);
break;
case PRU_SENSE:
((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
(void) splx(s);
return (0);
case PRU_RCVOOB:
if ((so->so_oobmark == 0 &&
(so->so_state & SS_RCVATMARK) == 0) ||
so->so_options & SO_OOBINLINE ||
tp->t_oobflags & TCPOOB_HADDATA) {
error = EINVAL;
break;
}
if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
error = EWOULDBLOCK;
break;
}
m->m_len = 1;
*mtod(m, caddr_t) = tp->t_iobc;
if (((int)nam & MSG_PEEK) == 0)
tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
break;
case PRU_SENDOOB:
if (sbspace(&so->so_snd) < -512) {
m_freem(m);
error = ENOBUFS;
break;
}
/*
* According to RFC961 (Assigned Protocols),
* the urgent pointer points to the last octet
* of urgent data. We continue, however,
* to consider it to indicate the first octet
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
sbappend(&so->so_snd, m);
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
error = tcp_output(tp);
tp->t_force = 0;
break;
case PRU_SOCKADDR:
in_setsockaddr(inp, nam);
break;
case PRU_PEERADDR:
in_setpeeraddr(inp, nam);
break;
/*
* TCP slow timer went off; going through this
* routine for tracing's sake.
*/
case PRU_SLOWTIMO:
tp = tcp_timers(tp, (int)nam);
#ifdef TCPDEBUG
req |= (int)nam << 8; /* for debug's sake */
#endif
break;
default:
panic("tcp_usrreq");
}
#ifdef TCPDEBUG
if (tp && (so->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
#endif
splx(s);
return (error);
}
#endif
#ifdef TCPDEBUG
#define TCPDEBUG0 int ostate
#define TCPDEBUG1() ostate = tp ? tp->t_state : 0

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
* $FreeBSD$
* $Id$
*/
#include <sys/param.h>
@ -90,7 +90,6 @@ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD,
static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET };
static void udp_detach __P((struct inpcb *));
static int udp_output __P((struct inpcb *, struct mbuf *, struct mbuf *,
struct mbuf *));
static void udp_notify __P((struct inpcb *, int));
@ -464,150 +463,232 @@ static u_long udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
&udp_recvspace, 0, "");
/*ARGSUSED*/
int
udp_usrreq(so, req, m, addr, control)
struct socket *so;
int req;
struct mbuf *m, *addr, *control;
static int
udp_abort(struct socket *so)
{
struct inpcb *inp = sotoinpcb(so);
int error = 0;
struct inpcb *inp;
int s;
if (req == PRU_CONTROL)
return (in_control(so, (u_long)m, (caddr_t)addr,
(struct ifnet *)control));
if (inp == NULL && req != PRU_ATTACH) {
error = EINVAL;
goto release;
}
/*
* Note: need to block udp_input while changing
* the udp pcb queue and/or pcb addresses.
*/
switch (req) {
case PRU_ATTACH:
if (inp != NULL) {
error = EINVAL;
break;
}
s = splnet();
error = in_pcballoc(so, &udbinfo);
splx(s);
if (error)
break;
error = soreserve(so, udp_sendspace, udp_recvspace);
if (error)
break;
((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl;
break;
case PRU_DETACH:
udp_detach(inp);
break;
case PRU_BIND:
s = splnet();
error = in_pcbbind(inp, addr);
splx(s);
break;
case PRU_LISTEN:
error = EOPNOTSUPP;
break;
case PRU_CONNECT:
if (inp->inp_faddr.s_addr != INADDR_ANY) {
error = EISCONN;
break;
}
s = splnet();
error = in_pcbconnect(inp, addr);
splx(s);
if (error == 0)
soisconnected(so);
break;
case PRU_CONNECT2:
error = EOPNOTSUPP;
break;
case PRU_ACCEPT:
error = EOPNOTSUPP;
break;
case PRU_DISCONNECT:
if (inp->inp_faddr.s_addr == INADDR_ANY) {
error = ENOTCONN;
break;
}
s = splnet();
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
splx(s);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
break;
case PRU_SHUTDOWN:
socantsendmore(so);
break;
case PRU_SEND:
return (udp_output(inp, m, addr, control));
case PRU_ABORT:
soisdisconnected(so);
udp_detach(inp);
break;
case PRU_SOCKADDR:
in_setsockaddr(inp, addr);
break;
case PRU_PEERADDR:
in_setpeeraddr(inp, addr);
break;
case PRU_SENSE:
/*
* stat: don't bother with a blocksize.
*/
return (0);
case PRU_SENDOOB:
case PRU_FASTTIMO:
case PRU_SLOWTIMO:
case PRU_PROTORCV:
case PRU_PROTOSEND:
error = EOPNOTSUPP;
break;
case PRU_RCVD:
case PRU_RCVOOB:
return (EOPNOTSUPP); /* do not free mbuf's */
default:
panic("udp_usrreq");
}
release:
if (control) {
printf("udp control data unexpectedly retained\n");
m_freem(control);
}
if (m)
m_freem(m);
return (error);
}
static void
udp_detach(inp)
struct inpcb *inp;
{
int s = splnet();
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL; /* ??? possible? panic instead? */
soisdisconnected(so);
s = splnet();
in_pcbdetach(inp);
splx(s);
return 0;
}
/* XXX should be pru_accept_notsupp */
static int
udp_accept(struct socket *so, struct mbuf *nam)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
return EOPNOTSUPP;
}
static int
udp_attach(struct socket *so, int proto)
{
struct inpcb *inp;
int s, error;
inp = sotoinpcb(so);
if (inp != 0)
return EINVAL;
s = splnet();
error = in_pcballoc(so, &udbinfo);
splx(s);
if (error)
return error;
error = soreserve(so, udp_sendspace, udp_recvspace);
if (error)
return error;
((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl;
return 0;
}
static int
udp_bind(struct socket *so, struct mbuf *nam)
{
struct inpcb *inp;
int s, error;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
s = splnet();
error = in_pcbbind(inp, nam);
splx(s);
return error;
}
static int
udp_connect(struct socket *so, struct mbuf *nam)
{
struct inpcb *inp;
int s, error;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
if (inp->inp_faddr.s_addr != INADDR_ANY)
return EISCONN;
s = splnet();
error = in_pcbconnect(inp, nam);
splx(s);
if (error == 0)
soisconnected(so);
return error;
}
static int
udp_detach(struct socket *so)
{
struct inpcb *inp;
int s;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
s = splnet();
in_pcbdetach(inp);
splx(s);
return 0;
}
static int
udp_disconnect(struct socket *so)
{
struct inpcb *inp;
int s;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
if (inp->inp_faddr.s_addr == INADDR_ANY)
return ENOTCONN;
s = splnet();
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
splx(s);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
return 0;
}
/* should be pru_listen_notsupp */
static int
udp_listen(struct socket *so)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
return EOPNOTSUPP;
}
static int
udp_peeraddr(struct socket *so, struct mbuf *nam)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
in_setpeeraddr(inp, nam);
return 0;
}
/* XXX should be pru_rcvd_notsupp */
static int
udp_rcvd(struct socket *so, int flags)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
return EOPNOTSUPP;
}
/* XXX should be pru_rcvoob_notsupp */
static int
udp_rcvoob(struct socket *so, struct mbuf *m, int flags)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
return EOPNOTSUPP;
}
static int
udp_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *addr,
struct mbuf *control)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0) {
m_freem(m);
return EINVAL;
}
return udp_output(inp, m, addr, control);
}
static int
udp_sense(struct socket *so, struct stat *sb)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
return 0; /* xxx do something useful */
}
static int
udp_shutdown(struct socket *so)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
socantsendmore(so);
return 0;
}
static int
udp_sockaddr(struct socket *so, struct mbuf *nam)
{
struct inpcb *inp;
inp = sotoinpcb(so);
if (inp == 0)
return EINVAL;
in_setsockaddr(inp, nam);
return 0;
}
/* XXX - should fix parameter types of in_control or of pru_control... */
static int
udp_control(struct socket *so, int cmd, caddr_t data, struct ifnet *ifp)
{
return in_control(so, cmd, data, ifp);
}
struct pr_usrreqs udp_usrreqs = {
udp_abort, udp_accept, udp_attach, udp_bind, udp_connect,
pru_connect2_notsupp, udp_control, udp_detach, udp_disconnect,
udp_listen, udp_peeraddr, udp_rcvd, udp_rcvoob, udp_send,
udp_sense, udp_shutdown, udp_sockaddr
};

View File

@ -92,12 +92,11 @@ struct udpstat {
extern struct inpcbhead udb;
extern struct inpcbinfo udbinfo;
extern struct udpstat udpstat;
extern struct pr_usrreqs udp_usrreqs;
void udp_ctlinput __P((int, struct sockaddr *, void *));
void udp_init __P((void));
void udp_input __P((struct mbuf *, int));
int udp_usrreq __P((struct socket *,
int, struct mbuf *, struct mbuf *, struct mbuf *));
#endif
#endif