freebsd-dev/sys/netinet/tcp_timer.c
Garrett Wollman 2c37256e5a Modify the kernel to use the new pr_usrreqs interface rather than the old
pr_usrreq mechanism which was poorly designed and error-prone.  This
commit renames pr_usrreq to pr_ousrreq so that old code which depended on it
would break in an obvious manner.  This commit also implements the new
interface for TCP, although the old function is left as an example
(#ifdef'ed out).  This commit ALSO fixes a longstanding bug in the
TCP timer processing (introduced by davidg on 1995/04/12) which caused
timer processing on a TCB to always stop after a single timer had
expired (because it misinterpreted the return value from tcp_usrreq()
to indicate that the TCB had been deleted).  Finally, some code
related to polling has been deleted from if.c because it is not
relevant t -current and doesn't look at all like my current code.
1996-07-11 16:32:50 +00:00

379 lines
11 KiB
C

/*
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
* $Id: tcp_timer.c,v 1.17 1996/06/03 15:37:52 jdp Exp $
*/
#ifndef TUBA_INCLUDE
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
int tcp_keepidle = TCPTV_KEEP_IDLE;
SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
CTLFLAG_RW, &tcp_keepidle , 0, "");
static int tcp_keepintvl = TCPTV_KEEPINTVL;
SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
CTLFLAG_RW, &tcp_keepintvl , 0, "");
static int always_keepalive = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive,
CTLFLAG_RW, &always_keepalive , 0, "");
static int tcp_keepcnt = TCPTV_KEEPCNT;
/* max idle probes */
static int tcp_maxpersistidle = TCPTV_KEEP_IDLE;
/* max idle time in persist */
int tcp_maxidle;
#else /* TUBA_INCLUDE */
static int tcp_maxpersistidle;
#endif /* TUBA_INCLUDE */
/*
* Fast timeout routine for processing delayed acks
*/
void
tcp_fasttimo()
{
register struct inpcb *inp;
register struct tcpcb *tp;
int s;
s = splnet();
for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
(tp->t_flags & TF_DELACK)) {
tp->t_flags &= ~TF_DELACK;
tp->t_flags |= TF_ACKNOW;
tcpstat.tcps_delack++;
(void) tcp_output(tp);
}
}
splx(s);
}
/*
* Tcp protocol timeout routine called every 500 ms.
* Updates the timers in all active tcb's and
* causes finite state machine actions if timers expire.
*/
void
tcp_slowtimo()
{
register struct inpcb *ip, *ipnxt;
register struct tcpcb *tp;
register int i;
int s;
#ifdef TCPDEBUG
int ostate;
#endif
s = splnet();
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
ip = tcb.lh_first;
if (ip == NULL) {
splx(s);
return;
}
/*
* Search through tcb's and update active timers.
*/
for (; ip != NULL; ip = ipnxt) {
ipnxt = ip->inp_list.le_next;
tp = intotcpcb(ip);
if (tp == 0 || tp->t_state == TCPS_LISTEN)
continue;
for (i = 0; i < TCPT_NTIMERS; i++) {
if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
#ifdef TCPDEBUG
ostate = tp->t_state;
#endif
tp = tcp_timers(tp, i);
if (tp == NULL)
goto tpgone;
#ifdef TCPDEBUG
if (tp->t_inpcb->inp_socket->so_options
& SO_DEBUG)
tcp_trace(TA_USER, ostate, tp,
(struct tcpiphdr *)0,
PRU_SLOWTIMO);
#endif
}
}
tp->t_idle++;
tp->t_duration++;
if (tp->t_rtt)
tp->t_rtt++;
tpgone:
;
}
tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */
#ifdef TCP_COMPAT_42
if ((int)tcp_iss < 0)
tcp_iss = TCP_ISSINCR; /* XXX */
#endif
tcp_now++; /* for timestamps */
splx(s);
}
#ifndef TUBA_INCLUDE
/*
* Cancel all timers for TCP tp.
*/
void
tcp_canceltimers(tp)
struct tcpcb *tp;
{
register int i;
for (i = 0; i < TCPT_NTIMERS; i++)
tp->t_timer[i] = 0;
}
int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
{ 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */
/*
* TCP timer processing.
*/
struct tcpcb *
tcp_timers(tp, timer)
register struct tcpcb *tp;
int timer;
{
register int rexmt;
switch (timer) {
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
* too long, or if 2MSL time is up from TIME_WAIT, delete connection
* control block. Otherwise, check again in a bit.
*/
case TCPT_2MSL:
if (tp->t_state != TCPS_TIME_WAIT &&
tp->t_idle <= tcp_maxidle)
tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
else
tp = tcp_close(tp);
break;
/*
* Retransmission timer went off. Message has not
* been acked within retransmit interval. Back off
* to a longer retransmit interval and retransmit one segment.
*/
case TCPT_REXMT:
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tcpstat.tcps_timeoutdrop++;
tp = tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
break;
}
tcpstat.tcps_rexmttimeo++;
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
/*
* If losing, let the lower level know and try for
* a better route. Also, if we backed off this far,
* our srtt estimate is probably bogus. Clobber it
* so we'll take the next rtt measurement as our srtt;
* move the current srtt into rttvar to keep the current
* retransmit times until then.
*/
if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
in_losing(tp->t_inpcb);
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
tp->t_srtt = 0;
}
tp->snd_nxt = tp->snd_una;
/*
* Force a segment to be sent.
*/
tp->t_flags |= TF_ACKNOW;
/*
* If timing a segment in this window, stop the timer.
*/
tp->t_rtt = 0;
/*
* Close the congestion window down to one segment
* (we'll open it by one segment for each ack we get).
* Since we probably have a window's worth of unacked
* data accumulated, this "slow start" keeps us from
* dumping all that data as back-to-back packets (which
* might overwhelm an intermediate gateway).
*
* There are two phases to the opening: Initially we
* open by one mss on each ack. This makes the window
* size increase exponentially with time. If the
* window is larger than the path can handle, this
* exponential growth results in dropped packet(s)
* almost immediately. To get more time between
* drops but still "push" the network to take advantage
* of improving conditions, we switch from exponential
* to linear window opening at some threshhold size.
* For a threshhold, we use half the current window
* size, truncated to a multiple of the mss.
*
* (the minimum cwnd that will give us exponential
* growth is 2 mss. We don't allow the threshhold
* to go below this.)
*/
{
u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
if (win < 2)
win = 2;
tp->snd_cwnd = tp->t_maxseg;
tp->snd_ssthresh = win * tp->t_maxseg;
tp->t_dupacks = 0;
}
(void) tcp_output(tp);
break;
/*
* Persistance timer into zero window.
* Force a byte to be output, if possible.
*/
case TCPT_PERSIST:
tcpstat.tcps_persisttimeo++;
/*
* Hack: if the peer is dead/unreachable, we do not
* time out if the window is closed. After a full
* backoff, drop the connection if the idle time
* (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
if (tp->t_rxtshift == TCP_MAXRXTSHIFT) {
u_long maxidle = TCP_REXMTVAL(tp);
if (maxidle < tp->t_rttmin)
maxidle = tp->t_rttmin;
maxidle *= tcp_totbackoff;
if (tp->t_idle >= tcp_maxpersistidle ||
tp->t_idle >= maxidle) {
tcpstat.tcps_persistdrop++;
tp = tcp_drop(tp, ETIMEDOUT);
break;
}
}
tcp_setpersist(tp);
tp->t_force = 1;
(void) tcp_output(tp);
tp->t_force = 0;
break;
/*
* Keep-alive timer went off; send something
* or drop connection if idle for too long.
*/
case TCPT_KEEP:
tcpstat.tcps_keeptimeo++;
if (tp->t_state < TCPS_ESTABLISHED)
goto dropit;
if ((always_keepalive ||
tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
goto dropit;
/*
* Send a packet designed to force a response
* if the peer is up and reachable:
* either an ACK if the connection is still alive,
* or an RST if the peer has closed the connection
* due to timeout or reboot.
* Using sequence number tp->snd_una-1
* causes the transmitted zero-length segment
* to lie outside the receive window;
* by the protocol spec, this requires the
* correspondent TCP to respond.
*/
tcpstat.tcps_keepprobe++;
#ifdef TCP_COMPAT_42
/*
* The keepalive packet must have nonzero length
* to get a 4.2 host to respond.
*/
tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
tp->rcv_nxt - 1, tp->snd_una - 1, 0);
#else
tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
tp->rcv_nxt, tp->snd_una - 1, 0);
#endif
tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
} else
tp->t_timer[TCPT_KEEP] = tcp_keepidle;
break;
dropit:
tcpstat.tcps_keepdrops++;
tp = tcp_drop(tp, ETIMEDOUT);
break;
}
return (tp);
}
#endif /* TUBA_INCLUDE */